Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 :
3 : namespace bigquery_emulator {
4 : namespace backend {
5 : namespace engine {
6 : namespace duckdb {
7 : namespace transpiler {
8 :
9 1 : TEST_F(TranspilerTest, EmitSingleRowScanEmitsSelectOne) {
10 : // `SELECT 1` analyzes to a ResolvedProjectScan over a
11 : // ResolvedSingleRowScan. The single-row scan is the analyzer's
12 : // representation of "no FROM clause" -- a relation with one row
13 : // and no columns. We emit `SELECT 1` so the wrapping ProjectScan
14 : // can splice it into `FROM (<inner>)` like every other scan emit.
15 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT 1");
16 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
17 1 : ASSERT_NE(scan, nullptr);
18 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_SINGLE_ROW_SCAN);
19 1 : TestTranspiler t;
20 1 : EXPECT_EQ(
21 1 : t.EmitSingleRowScan(scan->GetAs<::googlesql::ResolvedSingleRowScan>()),
22 1 : "SELECT 1");
23 1 : }
24 :
25 1 : TEST_F(TranspilerTest, EmitComputedColumnLiteral) {
26 : // `SELECT 1` lands a ProjectScan whose `expr_list[0]` is a
27 : // ResolvedComputedColumn binding the `1` literal to the
28 : // synthesized output column. EmitComputedColumn lowers the bound
29 : // expression and adds `AS "<column-name>"`. We assert on the
30 : // analyzer's synthesized column name (`$col1`) so any drift in the
31 : // analyzer's auto-aliasing surfaces here rather than downstream.
32 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT 1");
33 1 : ASSERT_NE(stmt, nullptr);
34 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
35 1 : ASSERT_NE(q, nullptr);
36 1 : const ::googlesql::ResolvedScan* scan = q->query();
37 1 : ASSERT_NE(scan, nullptr);
38 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_PROJECT_SCAN);
39 1 : const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
40 1 : ASSERT_GE(project->expr_list_size(), 1);
41 1 : const ::googlesql::ResolvedComputedColumn* cc = project->expr_list(0);
42 1 : ASSERT_NE(cc, nullptr);
43 1 : TestTranspiler t;
44 1 : EXPECT_EQ(t.EmitComputedColumn(cc), "1 AS \"$col1\"");
45 1 : }
46 :
47 1 : TEST_F(TranspilerTest, EmitComputedColumnFallsBackOnUnloweredExpr) {
48 : // Pick a function whose disposition has no DuckDB lowering so
49 : // its `EmitFunctionCall` returns "" -- the wrapping
50 : // EmitComputedColumn must propagate the empty-string fallback
51 : // contract rather than emit `<unset> AS "<col>"`.
52 : // `BIT_COUNT` is on the `semantic_executor` route in the YAML
53 : // disposition table (BQ flavor differs from DuckDB's `bit_count`).
54 1 : const ::googlesql::ResolvedStatement* stmt =
55 1 : Analyze("SELECT BIT_COUNT(id) FROM people");
56 1 : ASSERT_NE(stmt, nullptr);
57 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
58 1 : ASSERT_NE(q, nullptr);
59 1 : const ::googlesql::ResolvedScan* scan = q->query();
60 1 : ASSERT_NE(scan, nullptr);
61 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_PROJECT_SCAN);
62 1 : const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
63 1 : ASSERT_GE(project->expr_list_size(), 1);
64 1 : const ::googlesql::ResolvedComputedColumn* cc = project->expr_list(0);
65 1 : ASSERT_NE(cc, nullptr);
66 1 : TestTranspiler t;
67 1 : EXPECT_EQ(t.EmitComputedColumn(cc), "");
68 1 : }
69 :
70 1 : TEST_F(TranspilerTest, EmitProjectScanSelectLiteral) {
71 : // The full ProjectScan emit for `SELECT 1` threads:
72 : // * EmitSingleRowScan -> "SELECT 1"
73 : // * EmitComputedColumn -> "1 AS \"$col1\""
74 : // and stitches them as `SELECT <projection> FROM (<inner>)`.
75 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT 1");
76 1 : ASSERT_NE(stmt, nullptr);
77 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
78 1 : ASSERT_NE(q, nullptr);
79 1 : const ::googlesql::ResolvedScan* scan = q->query();
80 1 : ASSERT_NE(scan, nullptr);
81 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_PROJECT_SCAN);
82 1 : TestTranspiler t;
83 1 : EXPECT_EQ(t.EmitProjectScan(scan->GetAs<::googlesql::ResolvedProjectScan>()),
84 1 : "SELECT 1 AS \"$col1\" FROM (SELECT 1)");
85 1 : }
86 :
87 1 : TEST_F(TranspilerTest, EmitProjectScanElidesNoOpPermutation) {
88 : // For `SELECT name, id FROM people` the analyzer wraps the
89 : // TableScan in a no-op ProjectScan: `expr_list` is empty and
90 : // `column_list` is a permutation of the input scan's column list
91 : // by column id. The emit should drop the wrap and return the inner
92 : // TableScan SQL directly so the outer `EmitQueryStmt`'s projection
93 : // is the only one that does the reordering -- otherwise we stack
94 : // `SELECT "name", "id" FROM (SELECT "id", "name" ...)` redundantly
95 : // on top of the TableScan emit. Same applies to identity-only
96 : // projections (`SELECT id, name FROM people`) and analyzer-pruned
97 : // shapes where `column_list` is a single-column subset of the
98 : // table's columns; both reduce to the inner TableScan SQL.
99 1 : const ::googlesql::ResolvedStatement* stmt =
100 1 : Analyze("SELECT name, id FROM people");
101 1 : ASSERT_NE(stmt, nullptr);
102 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
103 1 : ASSERT_NE(q, nullptr);
104 1 : const ::googlesql::ResolvedScan* scan = q->query();
105 1 : ASSERT_NE(scan, nullptr);
106 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_PROJECT_SCAN);
107 1 : const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
108 1 : ASSERT_EQ(project->expr_list_size(), 0);
109 1 : TestTranspiler t;
110 : // Emit must equal the *input* TableScan's emit, not a wrapping
111 : // SELECT around it.
112 1 : EXPECT_EQ(t.EmitProjectScan(project),
113 1 : "SELECT \"id\", \"name\" FROM \"people\"");
114 1 : }
115 :
116 1 : TEST_F(TranspilerTest, EmitOutputColumnCollapsesAliasWhenNamesMatch) {
117 : // For `SELECT 1` the output column's user-visible name and the
118 : // physical column's name both resolve to `$col1`, so the alias
119 : // collapses to just `"$col1"` -- DuckDB carries the column name
120 : // straight through the outermost SELECT.
121 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT 1");
122 1 : ASSERT_NE(stmt, nullptr);
123 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
124 1 : ASSERT_NE(q, nullptr);
125 1 : ASSERT_EQ(q->output_column_list_size(), 1);
126 1 : TestTranspiler t;
127 1 : EXPECT_EQ(t.EmitOutputColumn(q->output_column_list(0)), "\"$col1\"");
128 1 : }
129 :
130 1 : TEST_F(TranspilerTest, EmitOutputColumnEmitsAliasWhenNamesDiffer) {
131 : // `SELECT id AS user_id FROM people` lands an output column whose
132 : // user-visible name (`user_id`) differs from the physical column
133 : // name (`id`); the emit must surface both as `"id" AS "user_id"`.
134 1 : const ::googlesql::ResolvedStatement* stmt =
135 1 : Analyze("SELECT id AS user_id FROM people");
136 1 : ASSERT_NE(stmt, nullptr);
137 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
138 1 : ASSERT_NE(q, nullptr);
139 1 : ASSERT_EQ(q->output_column_list_size(), 1);
140 1 : TestTranspiler t;
141 1 : EXPECT_EQ(t.EmitOutputColumn(q->output_column_list(0)),
142 1 : "\"id\" AS \"user_id\"");
143 1 : }
144 :
145 1 : TEST_F(TranspilerTest, EmitQueryStmtSelectLiteral) {
146 : // End-to-end for `SELECT 1`: the analyzer wraps a ProjectScan
147 : // around a SingleRowScan and the QueryStmt's output_column_list
148 : // carries the synthesized `$col1` alias. The emit wires
149 : // EmitProjectScan + EmitOutputColumn into the final SQL.
150 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT 1");
151 1 : ASSERT_NE(stmt, nullptr);
152 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
153 1 : TestTranspiler t;
154 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()),
155 1 : "SELECT \"$col1\" FROM (SELECT 1 AS \"$col1\" FROM (SELECT 1))");
156 1 : }
157 :
158 1 : TEST_F(TranspilerTest, EmitQueryStmtSelectLiteralWithExplicitAlias) {
159 : // `SELECT 1 AS x` rebinds the synthesized column id to the
160 : // user-spelled alias; both `output_column_list[0].name()` and the
161 : // column's `name()` resolve to `x`, so the AS alias collapses on
162 : // the outermost SELECT.
163 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT 1 AS x");
164 1 : ASSERT_NE(stmt, nullptr);
165 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
166 1 : TestTranspiler t;
167 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()),
168 1 : "SELECT \"x\" FROM (SELECT 1 AS \"x\" FROM (SELECT 1))");
169 1 : }
170 :
171 1 : TEST_F(TranspilerTest, EmitQueryStmtTableProjectionPreservesColumnOrder) {
172 : // `SELECT id, name FROM people` should round-trip with both
173 : // columns in their declared order. The analyzer collapses this
174 : // straight onto the TableScan (no wrapping ProjectScan because
175 : // the projection matches the table's column list 1:1) and the
176 : // QueryStmt mapping just renames each column to itself.
177 1 : const ::googlesql::ResolvedStatement* stmt =
178 1 : Analyze("SELECT id, name FROM people");
179 1 : ASSERT_NE(stmt, nullptr);
180 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
181 1 : TestTranspiler t;
182 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()),
183 1 : "SELECT \"id\", \"name\" FROM (SELECT \"id\", \"name\" "
184 1 : "FROM \"people\")");
185 1 : }
186 :
187 1 : TEST_F(TranspilerTest, EmitQueryStmtExpressionProjection) {
188 : // A non-trivial projection (`COALESCE(name, 'unknown') AS n`)
189 : // forces the analyzer to wrap the TableScan in a ProjectScan
190 : // whose `expr_list` carries the ComputedColumn binding. The
191 : // outermost SELECT then projects the synthesized column under the
192 : // user-spelled alias.
193 1 : const ::googlesql::ResolvedStatement* stmt =
194 1 : Analyze("SELECT COALESCE(name, 'unknown') AS n FROM people");
195 1 : ASSERT_NE(stmt, nullptr);
196 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
197 1 : TestTranspiler t;
198 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()),
199 1 : "SELECT \"n\" FROM (SELECT COALESCE(\"name\", 'unknown') AS "
200 1 : "\"n\" FROM (SELECT \"id\", \"name\" FROM \"people\"))");
201 1 : }
202 :
203 1 : TEST_F(TranspilerTest, EmitQueryStmtReorderedOutputColumns) {
204 : // `SELECT name, id FROM people` reorders the table's column list.
205 : // The analyzer wraps the TableScan in a ProjectScan that mirrors
206 : // the table's storage order in `column_list` but the QueryStmt's
207 : // `output_column_list` reflects the user-spelled order, so the
208 : // outermost SELECT projects `name` before `id`.
209 1 : const ::googlesql::ResolvedStatement* stmt =
210 1 : Analyze("SELECT name, id FROM people");
211 1 : ASSERT_NE(stmt, nullptr);
212 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
213 1 : TestTranspiler t;
214 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()),
215 1 : "SELECT \"name\", \"id\" FROM (SELECT \"id\", \"name\" "
216 1 : "FROM \"people\")");
217 1 : }
218 :
219 1 : TEST_F(TranspilerTest, EmitQueryStmtAliasedColumnSurfacesAlias) {
220 : // `SELECT id AS user_id FROM people` keeps the physical column as
221 : // `id` inside the inner scan but renames it to `user_id` on the
222 : // outermost SELECT. The projection carries `<col> AS <alias>` so
223 : // the wire-side schema matches the user's spelling. The analyzer
224 : // does not prune the underlying TableScan's column_list down to
225 : // `[id]` here -- it keeps the full `[id, name]` table column list
226 : // and lets the wrapping ProjectScan narrow to `[id]`. The
227 : // EmitProjectScan no-op elision deliberately skips narrowing
228 : // layers (`column_list` is a strict subset of input, sizes
229 : // differ), so the wrap that drops `name` survives and the inner
230 : // emit shows three nested SELECTs.
231 1 : const ::googlesql::ResolvedStatement* stmt =
232 1 : Analyze("SELECT id AS user_id FROM people");
233 1 : ASSERT_NE(stmt, nullptr);
234 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
235 1 : TestTranspiler t;
236 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()),
237 1 : "SELECT \"id\" AS \"user_id\" FROM (SELECT \"id\" "
238 1 : "FROM (SELECT \"id\", \"name\" FROM \"people\"))");
239 1 : }
240 :
241 1 : TEST_F(TranspilerTest, EmitQueryStmtFallsBackOnUnloweredProjection) {
242 : // `BIT_COUNT(id)` is on the `semantic_executor` route in the YAML
243 : // disposition table; the inner ProjectScan emit returns "" and
244 : // EmitQueryStmt propagates the empty-string fallback contract
245 : // instead of stitching an outer SELECT around a missing inner
246 : // relation.
247 1 : const ::googlesql::ResolvedStatement* stmt =
248 1 : Analyze("SELECT BIT_COUNT(id) FROM people");
249 1 : ASSERT_NE(stmt, nullptr);
250 1 : ASSERT_EQ(stmt->node_kind(), ::googlesql::RESOLVED_QUERY_STMT);
251 1 : TestTranspiler t;
252 1 : EXPECT_EQ(t.EmitQueryStmt(stmt->GetAs<::googlesql::ResolvedQueryStmt>()), "");
253 1 : }
254 :
255 : // --- Parameters ---------------------------------------------------------
256 :
257 : } // namespace transpiler
258 : } // namespace duckdb
259 : } // namespace engine
260 : } // namespace backend
261 : } // namespace bigquery_emulator
|