Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 : #include "googlesql/public/types/array_type.h"
3 :
4 : // R9: Anti-join over QUALIFY-deduped views — DuckDB binder "column id not
5 : // found". Plan:
6 : // .cursor/plans/conformance-hardening/07-reported-bug-regression-fixtures.plan.md
7 : // See also:
8 : // .cursor/plans/conformance-hardening/06-transpiler-binding-property-tests.plan.md
9 :
10 : namespace bigquery_emulator {
11 : namespace backend {
12 : namespace engine {
13 : namespace duckdb {
14 : namespace transpiler {
15 : namespace {
16 :
17 : constexpr int kCompositionGeneratorSeed = 0x06060606;
18 : constexpr int kCompositionGeneratorCases = 24;
19 : constexpr int kDistinctAfterDedupGeneratorCases = 12;
20 :
21 : std::string WrapQualifyDedupSubquery(absl::string_view partition_col,
22 26 : absl::string_view inner_sql) {
23 26 : return absl::StrCat(
24 26 : "SELECT * FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY ",
25 26 : partition_col,
26 26 : " ORDER BY ",
27 26 : partition_col,
28 26 : " DESC) AS rn FROM (",
29 26 : inner_sql,
30 26 : ")) WHERE rn = 1");
31 26 : }
32 :
33 84 : uint32_t LcgNext(uint32_t* state) {
34 84 : *state = *state * 1664525u + 1013904223u;
35 84 : return *state;
36 84 : }
37 :
38 : } // namespace
39 :
40 : class TranspilerCompositionTest : public TranspilerBindFixture {
41 : protected:
42 10 : void SetUp() override {
43 10 : TranspilerBindFixture::SetUp();
44 :
45 10 : const ::googlesql::ArrayType* profile_tags_type = nullptr;
46 10 : ASSERT_TRUE(
47 10 : type_factory_
48 10 : ->MakeArrayType(type_factory_->get_string(), &profile_tags_type)
49 10 : .ok());
50 10 : auto profiles = std::make_unique<::googlesql::SimpleTable>(
51 10 : "profiles",
52 10 : std::vector<::googlesql::SimpleTable::NameAndType>{
53 10 : {"id", type_factory_->get_int64()},
54 10 : {"name", type_factory_->get_string()},
55 10 : });
56 10 : catalog_->AddOwnedTable(std::move(profiles));
57 :
58 10 : auto dedup_profiles = std::make_unique<::googlesql::SimpleTable>(
59 10 : "dedup_profiles",
60 10 : std::vector<::googlesql::SimpleTable::NameAndType>{
61 10 : {"id", type_factory_->get_int64()},
62 10 : {"city", type_factory_->get_string()},
63 10 : {"tags", profile_tags_type},
64 10 : {"source_updated_at", type_factory_->get_timestamp()},
65 10 : });
66 10 : catalog_->AddOwnedTable(std::move(dedup_profiles));
67 :
68 10 : auto bq_orders = std::make_unique<::googlesql::SimpleTable>(
69 10 : "bq_orders",
70 10 : std::vector<::googlesql::SimpleTable::NameAndType>{
71 10 : {"order_id", type_factory_->get_int64()},
72 10 : {"customer_id", type_factory_->get_int64()},
73 10 : });
74 10 : catalog_->AddOwnedTable(std::move(bq_orders));
75 :
76 10 : const ::googlesql::ArrayType* vals_type = nullptr;
77 10 : ASSERT_TRUE(
78 10 : type_factory_->MakeArrayType(type_factory_->get_int64(), &vals_type)
79 10 : .ok());
80 10 : auto items = std::make_unique<::googlesql::SimpleTable>(
81 10 : "items",
82 10 : std::vector<::googlesql::SimpleTable::NameAndType>{
83 10 : {"id", type_factory_->get_int64()},
84 10 : {"vals", vals_type},
85 10 : });
86 10 : catalog_->AddOwnedTable(std::move(items));
87 :
88 10 : const ::googlesql::ArrayType* tags_type = nullptr;
89 10 : ASSERT_TRUE(
90 10 : type_factory_->MakeArrayType(type_factory_->get_string(), &tags_type)
91 10 : .ok());
92 10 : auto arrays = std::make_unique<::googlesql::SimpleTable>(
93 10 : "arrays",
94 10 : std::vector<::googlesql::SimpleTable::NameAndType>{
95 10 : {"tags", tags_type},
96 10 : });
97 10 : catalog_->AddOwnedTable(std::move(arrays));
98 :
99 10 : ExecDdl("CREATE TABLE bq_orders (order_id BIGINT, customer_id BIGINT)");
100 10 : ExecDdl("CREATE TABLE profiles (id BIGINT, name VARCHAR)");
101 10 : ExecDdl("CREATE TABLE people (id BIGINT, name VARCHAR)");
102 10 : ExecDdl("CREATE TABLE items (id BIGINT, vals BIGINT[])");
103 10 : ExecDdl("CREATE TABLE arrays (tags STRING[])");
104 10 : ExecDdl(
105 10 : "CREATE TABLE dedup_profiles (id BIGINT, city VARCHAR, tags STRING[], "
106 10 : "source_updated_at TIMESTAMPTZ)");
107 10 : }
108 : };
109 :
110 1 : TEST_F(TranspilerCompositionTest, DistinctCityAfterQualifyDedupBinds) {
111 1 : static constexpr const char kSql[] = R"sql(
112 1 : SELECT DISTINCT city
113 1 : FROM (
114 1 : SELECT * FROM dedup_profiles
115 1 : QUALIFY ROW_NUMBER() OVER (PARTITION BY id ORDER BY source_updated_at DESC) = 1
116 1 : )
117 1 : WHERE city IS NOT NULL
118 1 : )sql";
119 1 : AssertSqlTranspileBinds(kSql);
120 1 : }
121 :
122 1 : TEST_F(TranspilerCompositionTest, DistinctUnnestAfterQualifyDedupBinds) {
123 1 : static constexpr const char kSql[] = R"sql(
124 1 : SELECT DISTINCT tag
125 1 : FROM (
126 1 : SELECT * FROM dedup_profiles
127 1 : QUALIFY ROW_NUMBER() OVER (PARTITION BY id ORDER BY source_updated_at DESC) = 1
128 1 : ), UNNEST(tags) AS tag
129 1 : )sql";
130 1 : AssertSqlTranspileBinds(kSql);
131 1 : }
132 :
133 1 : TEST_F(TranspilerCompositionTest, CorrelatedUnnestFromTableBinds) {
134 1 : static constexpr const char kSql[] = R"sql(
135 1 : SELECT id, n
136 1 : FROM items, UNNEST(items.vals) AS n
137 1 : ORDER BY id, n
138 1 : )sql";
139 1 : AssertSqlTranspileBinds(kSql);
140 1 : }
141 :
142 1 : TEST_F(TranspilerCompositionTest, CoreUsageUnnestArrayShapeBinds) {
143 1 : static constexpr const char kSql[] = R"sql(
144 1 : SELECT tag FROM arrays, UNNEST(tags) AS tag
145 1 : )sql";
146 1 : AssertSqlTranspileBinds(kSql);
147 1 : }
148 :
149 1 : TEST_F(TranspilerCompositionTest, NestedUnnestCrossProductBinds) {
150 1 : static constexpr const char kSql[] = R"sql(
151 1 : SELECT n, m
152 1 : FROM UNNEST(GENERATE_ARRAY(1, 2)) AS n
153 1 : CROSS JOIN UNNEST(GENERATE_ARRAY(10, 11)) AS m
154 1 : )sql";
155 1 : AssertSqlTranspileBinds(kSql);
156 1 : }
157 :
158 1 : TEST_F(TranspilerCompositionTest, OrphanOrdersQualifyDedupAntiJoinBinds) {
159 1 : static constexpr const char kSql[] = R"sql(
160 1 : SELECT o.order_id
161 1 : FROM (
162 1 : SELECT * FROM bq_orders
163 1 : QUALIFY ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY order_id) = 1
164 1 : ) o
165 1 : LEFT JOIN (
166 1 : SELECT * FROM profiles
167 1 : QUALIFY ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) = 1
168 1 : ) p ON o.customer_id = p.id
169 1 : WHERE p.id IS NULL
170 1 : ORDER BY o.order_id
171 1 : )sql";
172 1 : AssertSqlTranspileBinds(kSql);
173 1 : }
174 :
175 1 : TEST_F(TranspilerCompositionTest, OrphanOrdersSubqueryDedupAntiJoinBinds) {
176 1 : static constexpr const char kSql[] = R"sql(
177 1 : SELECT o.order_id, o.customer_id
178 1 : FROM (
179 1 : SELECT order_id, customer_id FROM (
180 1 : SELECT *, ROW_NUMBER() OVER (PARTITION BY order_id ORDER BY order_id) AS rn
181 1 : FROM (
182 1 : SELECT 1 AS order_id, 10 AS customer_id UNION ALL
183 1 : SELECT 2 AS order_id, 99 AS customer_id
184 1 : )
185 1 : ) WHERE rn = 1
186 1 : ) o
187 1 : LEFT JOIN (
188 1 : SELECT id FROM (
189 1 : SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS rn
190 1 : FROM (SELECT 10 AS id)
191 1 : ) WHERE rn = 1
192 1 : ) p ON o.customer_id = p.id
193 1 : WHERE o.customer_id IS NOT NULL AND p.id IS NULL
194 1 : )sql";
195 1 : AssertSqlTranspileBinds(kSql);
196 1 : }
197 :
198 1 : TEST_F(TranspilerCompositionTest, NestedQualifyJoinCteExceptBinds) {
199 1 : static constexpr const char kSql[] = R"sql(
200 1 : WITH dedup AS (
201 1 : SELECT id, name FROM (
202 1 : SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS rn
203 1 : FROM people
204 1 : ) WHERE rn = 1
205 1 : )
206 1 : SELECT a.id FROM dedup a
207 1 : LEFT JOIN dedup b ON a.id = b.id
208 1 : WHERE b.id IS NULL
209 1 : EXCEPT DISTINCT
210 1 : SELECT CAST(0 AS INT64) AS id
211 1 : )sql";
212 1 : AssertSqlTranspileBinds(kSql);
213 1 : }
214 :
215 1 : TEST_F(TranspilerCompositionTest, SeededCompositionGeneratorBinds) {
216 1 : uint32_t rng = kCompositionGeneratorSeed;
217 25 : for (int i = 0; i < kCompositionGeneratorCases; ++i) {
218 24 : const int base = static_cast<int>(LcgNext(&rng) % 2);
219 24 : const int wrap1 = static_cast<int>(LcgNext(&rng) % 3);
220 24 : const int wrap2 = static_cast<int>(LcgNext(&rng) % 3);
221 :
222 24 : const char* key_col = base == 0 ? "id" : "order_id";
223 24 : std::string inner = base == 0
224 24 : ? "SELECT id, name FROM people"
225 24 : : "SELECT order_id, customer_id FROM bq_orders";
226 :
227 24 : if (wrap1 == 1) {
228 5 : inner = WrapQualifyDedupSubquery(key_col, inner);
229 19 : } else if (wrap1 == 2) {
230 9 : inner = absl::StrCat("SELECT * EXCEPT(rn) FROM (",
231 9 : WrapQualifyDedupSubquery(key_col, inner),
232 9 : ")");
233 9 : }
234 :
235 24 : std::string sql;
236 24 : if (wrap2 == 0) {
237 6 : sql = absl::StrCat(
238 6 : "SELECT ", key_col, " FROM (", inner, ") t WHERE ", key_col, " >= 0");
239 18 : } else if (wrap2 == 1) {
240 10 : sql = absl::StrCat("WITH w AS (", inner, ") SELECT COUNT(*) AS c FROM w");
241 10 : } else {
242 8 : sql = absl::StrCat("SELECT a.",
243 8 : key_col,
244 8 : " FROM (",
245 8 : inner,
246 8 : ") a LEFT JOIN (",
247 8 : inner,
248 8 : ") b ON a.",
249 8 : key_col,
250 8 : " = b.",
251 8 : key_col,
252 8 : " WHERE b.",
253 8 : key_col,
254 8 : " IS NULL");
255 8 : }
256 :
257 24 : SCOPED_TRACE(absl::StrCat("case=", i, " sql=", sql));
258 24 : AssertSqlTranspileBinds(sql);
259 24 : }
260 1 : }
261 :
262 1 : TEST_F(TranspilerCompositionTest, SeededDistinctAfterDedupGeneratorBinds) {
263 1 : uint32_t rng = kCompositionGeneratorSeed ^ 0xD157111Cu;
264 13 : for (int i = 0; i < kDistinctAfterDedupGeneratorCases; ++i) {
265 12 : const int wrap3 = static_cast<int>(LcgNext(&rng) % 3);
266 12 : const std::string deduped = WrapQualifyDedupSubquery(
267 12 : "id", "SELECT id, city, tags FROM dedup_profiles");
268 :
269 12 : std::string sql;
270 12 : if (wrap3 == 0) {
271 3 : sql = absl::StrCat(
272 3 : "SELECT DISTINCT city FROM (", deduped, ") WHERE city IS NOT NULL");
273 9 : } else if (wrap3 == 1) {
274 4 : sql = absl::StrCat(
275 4 : "SELECT DISTINCT tag FROM (", deduped, "), UNNEST(tags) AS tag");
276 5 : } else {
277 5 : sql = absl::StrCat(
278 5 : "SELECT city, COUNT(*) AS c FROM (", deduped, ") GROUP BY city");
279 5 : }
280 :
281 12 : SCOPED_TRACE(absl::StrCat("distinct_case=", i, " sql=", sql));
282 12 : AssertSqlTranspileBinds(sql);
283 12 : }
284 1 : }
285 :
286 : } // namespace transpiler
287 : } // namespace duckdb
288 : } // namespace engine
289 : } // namespace backend
290 : } // namespace bigquery_emulator
|