Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 :
3 : namespace bigquery_emulator {
4 : namespace backend {
5 : namespace engine {
6 : namespace duckdb {
7 : namespace transpiler {
8 :
9 1 : TEST_F(TranspilerTest, EmitWithScanSingleCteSelectAll) {
10 : // Single-binding non-recursive CTE referenced once. The CTE body
11 : // is a plain TableScan over `people`; the ref reads two columns
12 : // back out.
13 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
14 1 : "WITH p AS (SELECT id, name FROM people) "
15 1 : "SELECT id, name FROM p");
16 1 : ASSERT_NE(stmt, nullptr);
17 1 : TestTranspiler t;
18 1 : std::string sql = t.Transpile(stmt);
19 : // The outermost EmitQueryStmt wraps the WITH body as a derived
20 : // table for the user-visible alias mapping; the CTE itself
21 : // anchors its columns positionally. The exact wrapping shape is
22 : // an emit-stability contract -- a regression that drops the
23 : // anchor or renames the CTE shows up here.
24 2 : EXPECT_NE(sql.find("WITH \"p\" AS ("), std::string::npos)
25 2 : << "expected non-recursive CTE header; got: " << sql;
26 2 : EXPECT_NE(sql.find(" AS \"_cte_0\""), std::string::npos)
27 2 : << "expected positional anchor for column 0; got: " << sql;
28 2 : EXPECT_NE(sql.find(" AS \"_cte_1\""), std::string::npos)
29 2 : << "expected positional anchor for column 1; got: " << sql;
30 2 : EXPECT_NE(sql.find(" FROM \"p\""), std::string::npos)
31 2 : << "expected ref scan to FROM the CTE name; got: " << sql;
32 1 : }
33 :
34 1 : TEST_F(TranspilerTest, EmitWithScanMultipleCtesDistinctNames) {
35 : // Two CTEs in one WITH clause, each bound to a separate ref.
36 : // The emit must produce both CTE headers and both ref-side
37 : // SELECTs without cross-pollution of column names.
38 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
39 1 : "WITH p AS (SELECT id FROM people), "
40 1 : " o AS (SELECT order_id FROM orders) "
41 1 : "SELECT p.id, o.order_id FROM p, o");
42 1 : ASSERT_NE(stmt, nullptr);
43 1 : TestTranspiler t;
44 1 : std::string sql = t.Transpile(stmt);
45 2 : EXPECT_NE(sql.find("WITH \"p\" AS ("), std::string::npos)
46 2 : << "expected first CTE header; got: " << sql;
47 2 : EXPECT_NE(sql.find("\"o\" AS ("), std::string::npos)
48 2 : << "expected second CTE header; got: " << sql;
49 2 : EXPECT_NE(sql.find(" FROM \"p\""), std::string::npos)
50 2 : << "expected ref to first CTE; got: " << sql;
51 2 : EXPECT_NE(sql.find(" FROM \"o\""), std::string::npos)
52 2 : << "expected ref to second CTE; got: " << sql;
53 1 : }
54 :
55 1 : TEST_F(TranspilerTest, EmitWithScanCteReferencedTwice) {
56 : // A single CTE referenced from two different scan positions.
57 : // The positional-anchor + per-ref rename scheme means each ref
58 : // independently renames the anchor to its own column names,
59 : // and the two refs cannot collide on a shared analyzer name.
60 : // SELF-JOIN form keeps both refs in scope at once.
61 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
62 1 : "WITH p AS (SELECT id, name FROM people) "
63 1 : "SELECT a.id, b.name FROM p AS a, p AS b WHERE a.id = b.id");
64 1 : ASSERT_NE(stmt, nullptr);
65 1 : TestTranspiler t;
66 1 : std::string sql = t.Transpile(stmt);
67 : // `a.id = b.id` lowers through `$equal` which is not yet in the
68 : // function disposition table for transpiled emit. That makes
69 : // the FilterScan return "" -- which then bubbles up through the
70 : // outer QueryStmt emit. The empty-string contract here is the
71 : // right answer for now; we still pin the CTE shape via the
72 : // single-ref test above. Smoke-test that no partial / malformed
73 : // emit gets through.
74 2 : EXPECT_TRUE(sql.empty() || sql.find("WITH \"p\" AS (") != std::string::npos)
75 2 : << "expected either empty (FilterScan fallback) or a WITH header; got: "
76 2 : << sql;
77 1 : }
78 :
79 1 : TEST_F(TranspilerTest, EmitWithScanRecursivePropagatesKeyword) {
80 : // `WITH RECURSIVE` now lowers through `EmitRecursiveScan`
81 : // (advanced-relational-routing Family 4), so the prior
82 : // "bails-to-empty" defense-in-depth has been promoted to a
83 : // real emit. This hand-built shape mirrors what the analyzer
84 : // produces for a trivial `WITH RECURSIVE r AS (SELECT 1 UNION
85 : // ALL SELECT 1) SELECT 1` -- bypassing the analyzer keeps the
86 : // test independent of the (currently-unmapped) `$less` /
87 : // `$greater` disposition entries that a non-trivial recursive
88 : // CTE relies on.
89 : //
90 : // The check confirms `WITH RECURSIVE` makes it into the emitted
91 : // SQL whenever the WithScan's `recursive()` flag is set.
92 1 : ::googlesql::ResolvedColumn anchor_col(
93 1 : /*column_id=*/200,
94 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$rec"),
95 1 : /*name=*/::googlesql::IdString::MakeGlobal("n"),
96 1 : type_factory_->get_int64());
97 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>>
98 1 : anchor_exprs;
99 1 : anchor_exprs.push_back(::googlesql::MakeResolvedComputedColumn(
100 1 : anchor_col,
101 1 : ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(1))));
102 1 : auto anchor_project = ::googlesql::MakeResolvedProjectScan(
103 1 : /*column_list=*/{anchor_col},
104 1 : std::move(anchor_exprs),
105 1 : ::googlesql::MakeResolvedSingleRowScan());
106 1 : auto anchor_item = ::googlesql::MakeResolvedSetOperationItem(
107 1 : std::move(anchor_project), /*output_column_list=*/{anchor_col});
108 1 : ::googlesql::ResolvedColumn rec_col(
109 1 : /*column_id=*/201,
110 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$rec"),
111 1 : /*name=*/::googlesql::IdString::MakeGlobal("n"),
112 1 : type_factory_->get_int64());
113 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>>
114 1 : rec_exprs;
115 1 : rec_exprs.push_back(::googlesql::MakeResolvedComputedColumn(
116 1 : rec_col, ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(2))));
117 1 : auto rec_project = ::googlesql::MakeResolvedProjectScan(
118 1 : /*column_list=*/{rec_col},
119 1 : std::move(rec_exprs),
120 1 : ::googlesql::MakeResolvedSingleRowScan());
121 1 : auto rec_item = ::googlesql::MakeResolvedSetOperationItem(
122 1 : std::move(rec_project), /*output_column_list=*/{rec_col});
123 1 : auto recursive_scan = ::googlesql::MakeResolvedRecursiveScan(
124 1 : /*column_list=*/{anchor_col},
125 1 : ::googlesql::ResolvedRecursiveScan::UNION_ALL,
126 1 : std::move(anchor_item),
127 1 : std::move(rec_item),
128 1 : /*recursion_depth_modifier=*/nullptr);
129 1 : auto entry =
130 1 : ::googlesql::MakeResolvedWithEntry("r", std::move(recursive_scan));
131 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedWithEntry>> entries;
132 1 : entries.push_back(std::move(entry));
133 1 : auto with_scan = ::googlesql::MakeResolvedWithScan(
134 1 : /*column_list=*/{},
135 1 : std::move(entries),
136 1 : ::googlesql::MakeResolvedSingleRowScan(),
137 1 : /*recursive=*/true);
138 1 : TestTranspiler t;
139 1 : std::string sql = t.EmitWithScan(with_scan.get());
140 2 : EXPECT_NE(sql.find("WITH RECURSIVE \"r\""), std::string::npos)
141 2 : << "expected WITH RECURSIVE keyword; got: " << sql;
142 2 : EXPECT_NE(sql.find(" UNION ALL "), std::string::npos)
143 2 : << "expected UNION ALL between anchor and recursive arm; got: " << sql;
144 1 : }
145 :
146 1 : TEST_F(TranspilerTest, EmitRecursiveScanWithDepthThreadsCounter) {
147 1 : ::googlesql::ResolvedColumn n_col(
148 1 : /*column_id=*/200,
149 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$rec"),
150 1 : /*name=*/::googlesql::IdString::MakeGlobal("n"),
151 1 : type_factory_->get_int64());
152 1 : ::googlesql::ResolvedColumn depth_col(
153 1 : /*column_id=*/201,
154 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$rec"),
155 1 : /*name=*/::googlesql::IdString::MakeGlobal("depth"),
156 1 : type_factory_->get_int64());
157 1 : auto depth_mod = ::googlesql::MakeResolvedRecursionDepthModifier(
158 1 : /*lower_bound=*/nullptr,
159 1 : /*upper_bound=*/nullptr,
160 1 : ::googlesql::MakeResolvedColumnHolder(depth_col));
161 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>>
162 1 : anchor_exprs;
163 1 : anchor_exprs.push_back(::googlesql::MakeResolvedComputedColumn(
164 1 : n_col, ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(1))));
165 1 : auto anchor_project = ::googlesql::MakeResolvedProjectScan(
166 1 : /*column_list=*/{n_col},
167 1 : std::move(anchor_exprs),
168 1 : ::googlesql::MakeResolvedSingleRowScan());
169 1 : auto anchor_item = ::googlesql::MakeResolvedSetOperationItem(
170 1 : std::move(anchor_project), /*output_column_list=*/{n_col, depth_col});
171 1 : ::googlesql::ResolvedColumn rec_n_col(
172 1 : /*column_id=*/202,
173 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$rec"),
174 1 : /*name=*/::googlesql::IdString::MakeGlobal("n"),
175 1 : type_factory_->get_int64());
176 1 : ::googlesql::ResolvedColumn rec_depth_col(
177 1 : /*column_id=*/203,
178 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$rec"),
179 1 : /*name=*/::googlesql::IdString::MakeGlobal("depth"),
180 1 : type_factory_->get_int64());
181 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>>
182 1 : rec_exprs;
183 1 : rec_exprs.push_back(::googlesql::MakeResolvedComputedColumn(
184 1 : rec_n_col,
185 1 : ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(2))));
186 1 : auto rec_project = ::googlesql::MakeResolvedProjectScan(
187 1 : /*column_list=*/{rec_n_col},
188 1 : std::move(rec_exprs),
189 1 : ::googlesql::MakeResolvedSingleRowScan());
190 1 : auto rec_item = ::googlesql::MakeResolvedSetOperationItem(
191 1 : std::move(rec_project),
192 1 : /*output_column_list=*/{rec_n_col, rec_depth_col});
193 1 : auto recursive_scan = ::googlesql::MakeResolvedRecursiveScan(
194 1 : /*column_list=*/{n_col, depth_col},
195 1 : ::googlesql::ResolvedRecursiveScan::UNION_ALL,
196 1 : std::move(anchor_item),
197 1 : std::move(rec_item),
198 1 : std::move(depth_mod));
199 1 : auto entry =
200 1 : ::googlesql::MakeResolvedWithEntry("r", std::move(recursive_scan));
201 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedWithEntry>> entries;
202 1 : entries.push_back(std::move(entry));
203 1 : auto with_scan = ::googlesql::MakeResolvedWithScan(
204 1 : /*column_list=*/{},
205 1 : std::move(entries),
206 1 : ::googlesql::MakeResolvedSingleRowScan(),
207 1 : /*recursive=*/true);
208 1 : TestTranspiler t;
209 1 : std::string sql = t.EmitWithScan(with_scan.get());
210 2 : EXPECT_NE(sql.find("0 AS \"_cte_1\""), std::string::npos)
211 2 : << "expected anchor depth 0; got: " << sql;
212 2 : EXPECT_NE(sql.find("\"depth\" + 1 AS \"_cte_1\""), std::string::npos)
213 2 : << "expected recursive depth increment; got: " << sql;
214 1 : }
215 :
216 1 : TEST_F(TranspilerTest, EmitWithRefScanBareDirect) {
217 : // Direct-construction of a ResolvedWithRefScan so we can pin the
218 : // per-column rename without depending on the surrounding
219 : // WithScan setup. The ref scan declares two columns; the emit
220 : // produces the `SELECT "_cte_0" AS "<n0>", "_cte_1" AS "<n1>"
221 : // FROM "<name>"` shape.
222 1 : ::googlesql::ResolvedColumn c0(
223 1 : /*column_id=*/10,
224 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("p"),
225 1 : /*name=*/::googlesql::IdString::MakeGlobal("id"),
226 1 : type_factory_->get_int64());
227 1 : ::googlesql::ResolvedColumn c1(
228 1 : /*column_id=*/11,
229 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("p"),
230 1 : /*name=*/::googlesql::IdString::MakeGlobal("name"),
231 1 : type_factory_->get_string());
232 1 : auto ref = ::googlesql::MakeResolvedWithRefScan({c0, c1}, "p");
233 1 : TestTranspiler t;
234 1 : EXPECT_EQ(t.EmitWithRefScan(ref.get()),
235 1 : "SELECT \"_cte_0\" AS \"id\", \"_cte_1\" AS \"name\" FROM \"p\"");
236 1 : }
237 :
238 : // --- ResolvedSubqueryExpr (non-correlated) -----------------------------
239 : //
240 : // `docs/ENGINE_POLICY.md` Family 2. Non-correlated SCALAR /
241 : // IN / EXISTS / ARRAY subqueries lower to DuckDB's native subquery
242 : // surface. Correlated forms (non-empty `parameter_list()`) are
243 : // the classifier's responsibility (Family 3 promotes them to
244 : // `kSemanticExecutor`); on the transpiler side we defend in
245 : // depth by bailing to "" if we somehow see one.
246 :
247 1 : TEST_F(TranspilerTest, EmitSubqueryExprScalarFromAnalyzer) {
248 : // `SELECT (SELECT MAX(id) FROM people) AS m FROM people`
249 : // wraps a scalar subquery in the outer SELECT's expr_list. The
250 : // inner subquery has no `parameter_list` (uncorrelated), so the
251 : // emit lowers to `(<inner_sql>)`.
252 1 : const ::googlesql::ResolvedStatement* stmt =
253 1 : Analyze("SELECT (SELECT MAX(id) FROM people) AS m FROM people");
254 1 : ASSERT_NE(stmt, nullptr);
255 1 : TestTranspiler t;
256 1 : std::string sql = t.Transpile(stmt);
257 : // The exact wrap shape changes with the outermost emit, but the
258 : // scalar-subquery emit MUST appear as a parenthesized SELECT
259 : // somewhere in the body. The empty-string contract would mean
260 : // the SubqueryExpr emit silently failed.
261 2 : EXPECT_FALSE(sql.empty()) << "expected non-empty SQL; SubqueryExpr emit "
262 2 : "should not silently fail";
263 2 : EXPECT_NE(sql.find("(SELECT"), std::string::npos)
264 2 : << "expected a parenthesized scalar subquery; got: " << sql;
265 1 : }
266 :
267 1 : TEST_F(TranspilerTest, EmitSubqueryExprExistsFromAnalyzer) {
268 : // `WHERE EXISTS (<sub>)` resolves to a FilterScan whose
269 : // `filter_expr` is a SubqueryExpr of type EXISTS. The emit
270 : // wraps the inner SELECT in `EXISTS (...)`.
271 1 : const ::googlesql::ResolvedStatement* stmt =
272 1 : Analyze("SELECT id FROM people WHERE EXISTS (SELECT 1 FROM orders)");
273 1 : ASSERT_NE(stmt, nullptr);
274 1 : TestTranspiler t;
275 1 : std::string sql = t.Transpile(stmt);
276 2 : ASSERT_FALSE(sql.empty()) << "expected non-empty SQL";
277 2 : EXPECT_NE(sql.find("EXISTS ("), std::string::npos)
278 2 : << "expected EXISTS-prefixed subquery; got: " << sql;
279 1 : }
280 :
281 1 : TEST_F(TranspilerTest, EmitSubqueryExprInFromAnalyzer) {
282 : // `WHERE <lhs> IN (<sub>)` resolves to a FilterScan whose
283 : // `filter_expr` is a SubqueryExpr of type IN with the LHS
284 : // captured in `in_expr`. The emit wraps as `(<lhs> IN (<sub>))`.
285 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
286 1 : "SELECT id FROM people WHERE id IN (SELECT order_id FROM orders)");
287 1 : ASSERT_NE(stmt, nullptr);
288 1 : TestTranspiler t;
289 1 : std::string sql = t.Transpile(stmt);
290 2 : ASSERT_FALSE(sql.empty()) << "expected non-empty SQL";
291 2 : EXPECT_NE(sql.find(" IN ("), std::string::npos)
292 2 : << "expected IN-style emit; got: " << sql;
293 1 : }
294 :
295 1 : TEST_F(TranspilerTest, EmitSubqueryExprArrayFromAnalyzer) {
296 : // `SELECT ARRAY(<sub>)` resolves to a SubqueryExpr of type
297 : // ARRAY. DuckDB's `ARRAY(SELECT ...)` builds a LIST whose
298 : // element order matches the subquery's row order, matching
299 : // BigQuery's contract.
300 1 : const ::googlesql::ResolvedStatement* stmt =
301 1 : Analyze("SELECT ARRAY(SELECT id FROM people) AS ids FROM people");
302 1 : ASSERT_NE(stmt, nullptr);
303 1 : TestTranspiler t;
304 1 : std::string sql = t.Transpile(stmt);
305 2 : ASSERT_FALSE(sql.empty()) << "expected non-empty SQL";
306 2 : EXPECT_NE(sql.find("ARRAY("), std::string::npos)
307 2 : << "expected ARRAY-prefixed subquery; got: " << sql;
308 1 : }
309 :
310 1 : TEST_F(TranspilerTest, EmitSubqueryExprCorrelatedBailsToEmpty) {
311 : // Direct construction so we can build a SubqueryExpr with a
312 : // non-empty `parameter_list` -- the analyzer-side path for
313 : // correlated subqueries gets caught by the route classifier
314 : // upstream (Family 3), so we never reach the transpiler with
315 : // one. The bailout here is defense-in-depth: a future change
316 : // that routes a correlated form through this emit MUST NOT
317 : // silently lower it (the inner ColumnRefs would resolve against
318 : // DuckDB's own evaluation context, not the BigQuery outer-row
319 : // context, producing wrong answers).
320 1 : ::googlesql::ResolvedColumn outer_col(
321 1 : /*column_id=*/1,
322 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("outer"),
323 1 : /*name=*/::googlesql::IdString::MakeGlobal("k"),
324 1 : type_factory_->get_int64());
325 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedColumnRef>> params;
326 1 : params.push_back(::googlesql::MakeResolvedColumnRef(
327 1 : type_factory_->get_int64(), outer_col, /*is_correlated=*/true));
328 1 : auto inner = ::googlesql::MakeResolvedSingleRowScan();
329 1 : auto sub = ::googlesql::MakeResolvedSubqueryExpr(
330 1 : type_factory_->get_int64(),
331 1 : ::googlesql::ResolvedSubqueryExpr::SCALAR,
332 1 : std::move(params),
333 1 : /*in_expr=*/nullptr,
334 1 : std::move(inner));
335 1 : TestTranspiler t;
336 1 : EXPECT_EQ(t.EmitSubqueryExpr(sub.get()), "");
337 1 : }
338 :
339 1 : TEST_F(TranspilerTest, EmitSubqueryExprUnsupportedTypeBailsToEmpty) {
340 : // LIKE ANY / LIKE ALL / NOT LIKE ANY / NOT LIKE ALL stay on the
341 : // empty-string fallback today (deliberately out of scope for the
342 : // transpiler). Pin that
343 : // the default branch returns "" rather than emitting partial
344 : // SQL when a non-{SCALAR, IN, EXISTS, ARRAY} type slips through.
345 1 : auto inner = ::googlesql::MakeResolvedSingleRowScan();
346 1 : auto sub = ::googlesql::MakeResolvedSubqueryExpr(
347 1 : type_factory_->get_bool(),
348 1 : ::googlesql::ResolvedSubqueryExpr::LIKE_ANY,
349 1 : /*parameter_list=*/{},
350 : /*in_expr=*/
351 1 : ::googlesql::MakeResolvedLiteral(::googlesql::Value::String("a")),
352 1 : std::move(inner));
353 1 : TestTranspiler t;
354 1 : EXPECT_EQ(t.EmitSubqueryExpr(sub.get()), "");
355 1 : }
356 :
357 1 : TEST_F(TranspilerTest, EmitWithRefScanEmptyColumnListUsesStar) {
358 : // Degenerate `SELECT * FROM <cte>` shape: the analyzer may
359 : // produce a WithRefScan with an empty column_list when the
360 : // surrounding scan does not project any columns off the ref.
361 : // The emit falls back to `SELECT *` rather than emitting a
362 : // bare `SELECT FROM "p"` which DuckDB would reject.
363 1 : auto ref = ::googlesql::MakeResolvedWithRefScan({}, "p");
364 1 : TestTranspiler t;
365 1 : EXPECT_EQ(t.EmitWithRefScan(ref.get()), "SELECT * FROM \"p\"");
366 1 : }
367 :
368 : } // namespace transpiler
369 : } // namespace duckdb
370 : } // namespace engine
371 : } // namespace backend
372 : } // namespace bigquery_emulator
|