Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 :
3 : namespace bigquery_emulator {
4 : namespace backend {
5 : namespace engine {
6 : namespace duckdb {
7 : namespace transpiler {
8 :
9 1 : TEST_F(TranspilerTest, EmitTableScanEmitsSelectStar) {
10 : // `SELECT * FROM people` collapses (after rewrites) onto a
11 : // ResolvedTableScan whose `column_list` carries every column on
12 : // the underlying table. We assert on both the select-list shape
13 : // (one quoted identifier per column, in catalog order) and the
14 : // bare table-name reference -- the engine ATTACHes the storage's
15 : // backing files under that name at execute time.
16 1 : const ::googlesql::ResolvedStatement* stmt = Analyze("SELECT * FROM people");
17 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
18 1 : ASSERT_NE(scan, nullptr);
19 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_TABLE_SCAN);
20 1 : TestTranspiler t;
21 1 : EXPECT_EQ(t.EmitTableScan(scan->GetAs<::googlesql::ResolvedTableScan>()),
22 1 : "SELECT \"id\", \"name\" FROM \"people\"");
23 1 : }
24 :
25 1 : TEST_F(TranspilerTest, EmitFilterScanWrapsInputScanWithWhere) {
26 : // `WHERE id > 0` lands as a ResolvedFilterScan around a
27 : // ResolvedTableScan. The emit composes the table scan's
28 : // self-contained SELECT as a derived table so the WHERE clause
29 : // sees the same column aliases the inner SELECT exposes.
30 1 : const ::googlesql::ResolvedStatement* stmt =
31 1 : Analyze("SELECT id FROM people WHERE id > 0");
32 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
33 1 : ASSERT_NE(scan, nullptr);
34 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_FILTER_SCAN);
35 1 : TestTranspiler t;
36 1 : EXPECT_EQ(t.EmitFilterScan(scan->GetAs<::googlesql::ResolvedFilterScan>()),
37 1 : "SELECT * FROM (SELECT \"id\", \"name\" FROM \"people\") WHERE "
38 1 : "(\"id\" > 0)");
39 1 : }
40 :
41 1 : TEST_F(TranspilerTest, EmitFilterScanWithCoalescePredicateLowers) {
42 : // Picking a predicate that the function-call emit *does* know
43 : // about (`COALESCE`) lets us exercise the actual FilterScan SQL
44 : // composition. COALESCE(name, 'x') = 'x' isn't a particularly
45 : // useful predicate, but it threads two literals, a column ref,
46 : // and the COALESCE emit through the same SQL string -- exactly
47 : // the integration the FilterScan emit needs to keep honest.
48 1 : const ::googlesql::ResolvedStatement* stmt =
49 1 : Analyze("SELECT id FROM people WHERE COALESCE(name, 'x') = 'x'");
50 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
51 1 : ASSERT_NE(scan, nullptr);
52 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_FILTER_SCAN);
53 1 : TestTranspiler t;
54 1 : EXPECT_EQ(t.EmitFilterScan(scan->GetAs<::googlesql::ResolvedFilterScan>()),
55 1 : "SELECT * FROM (SELECT \"id\", \"name\" FROM \"people\") WHERE "
56 1 : "(COALESCE(\"name\", 'x') = 'x')");
57 1 : }
58 :
59 : // --- Join ---------------------------------------------------------------
60 :
61 1 : TEST_F(TranspilerTest, EmitJoinScanCrossJoinTwoUnnests) {
62 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
63 1 : "SELECT n, m FROM UNNEST(GENERATE_ARRAY(1, 10)) AS n "
64 1 : "CROSS JOIN UNNEST(GENERATE_ARRAY(1, 2)) AS m");
65 1 : ASSERT_NE(stmt, nullptr);
66 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
67 1 : ASSERT_NE(scan, nullptr);
68 : // Explicit CROSS JOIN of standalone UNNESTs chains into nested
69 : // single-array ResolvedArrayScan nodes (outer over inner).
70 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ARRAY_SCAN);
71 1 : const auto* arr = scan->GetAs<::googlesql::ResolvedArrayScan>();
72 1 : ASSERT_NE(arr, nullptr);
73 1 : ASSERT_EQ(arr->array_expr_list_size(), 1);
74 1 : ASSERT_NE(arr->input_scan(), nullptr);
75 1 : ASSERT_EQ(arr->input_scan()->node_kind(), ::googlesql::RESOLVED_ARRAY_SCAN);
76 1 : TestTranspiler t;
77 1 : std::string sql = t.EmitArrayScan(arr);
78 2 : ASSERT_FALSE(sql.empty()) << "multi-array cross unnest must emit";
79 2 : EXPECT_NE(sql.find("CROSS JOIN"), std::string::npos) << sql;
80 2 : EXPECT_NE(sql.find("unnest("), std::string::npos) << sql;
81 1 : }
82 :
83 1 : TEST_F(TranspilerTest, EmitJoinScanCrossJoinFromImplicit) {
84 : // `FROM people, orders` analyzes to a ResolvedJoinScan with
85 : // INNER + null `join_expr`. The emit lowers it to DuckDB's
86 : // explicit CROSS JOIN so a downstream FilterScan / ProjectScan
87 : // can wrap it without having to know about the implicit-join
88 : // shorthand.
89 1 : const ::googlesql::ResolvedStatement* stmt =
90 1 : Analyze("SELECT id FROM people, orders");
91 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
92 1 : ASSERT_NE(scan, nullptr);
93 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_JOIN_SCAN);
94 1 : TestTranspiler t;
95 1 : std::string sql =
96 1 : t.EmitJoinScan(scan->GetAs<::googlesql::ResolvedJoinScan>());
97 2 : EXPECT_NE(sql.find("CROSS JOIN"), std::string::npos) << sql;
98 2 : EXPECT_NE(sql.find("__bq_j_"), std::string::npos) << sql;
99 1 : }
100 :
101 1 : TEST_F(TranspilerTest, EmitJoinScanInnerWithLiteralPredicate) {
102 : // `ON TRUE` keeps the test focused on the join emit shape: the
103 : // predicate lowers cleanly through `EmitLiteral` so the assertion
104 : // can pin the full INNER JOIN SQL string. (`ON x = y` would
105 : // route through the `=` function call which isn't on the
106 : // function-call whitelist for the scan emit; that path is covered
107 : // by the propagation test below.)
108 1 : const ::googlesql::ResolvedStatement* stmt =
109 1 : Analyze("SELECT id FROM people INNER JOIN orders ON TRUE");
110 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
111 1 : ASSERT_NE(scan, nullptr);
112 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_JOIN_SCAN);
113 1 : TestTranspiler t;
114 1 : std::string sql =
115 1 : t.EmitJoinScan(scan->GetAs<::googlesql::ResolvedJoinScan>());
116 2 : EXPECT_NE(sql.find("INNER JOIN"), std::string::npos) << sql;
117 2 : EXPECT_NE(sql.find("__bq_j_"), std::string::npos) << sql;
118 2 : EXPECT_NE(sql.find(" ON true"), std::string::npos) << sql;
119 1 : }
120 :
121 1 : TEST_F(TranspilerTest, EmitJoinScanLeftWithLiteralPredicate) {
122 : // LEFT JOIN requires a non-null `join_expr`; `ON TRUE` is the
123 : // smallest predicate that round-trips through the emit. The
124 : // assertion confirms the keyword swap (INNER -> LEFT) is the
125 : // only difference vs. the inner test above.
126 1 : const ::googlesql::ResolvedStatement* stmt =
127 1 : Analyze("SELECT id FROM people LEFT JOIN orders ON TRUE");
128 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
129 1 : ASSERT_NE(scan, nullptr);
130 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_JOIN_SCAN);
131 1 : TestTranspiler t;
132 1 : std::string sql =
133 1 : t.EmitJoinScan(scan->GetAs<::googlesql::ResolvedJoinScan>());
134 2 : EXPECT_NE(sql.find("LEFT JOIN"), std::string::npos) << sql;
135 2 : EXPECT_NE(sql.find("__bq_j_"), std::string::npos) << sql;
136 1 : }
137 :
138 1 : TEST_F(TranspilerTest, EmitJoinScanInnerOnEqualPredicate) {
139 1 : const ::googlesql::ResolvedStatement* stmt =
140 1 : Analyze("SELECT id FROM people INNER JOIN orders ON id = order_id");
141 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
142 1 : ASSERT_NE(scan, nullptr);
143 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_JOIN_SCAN);
144 1 : TestTranspiler t;
145 1 : std::string sql =
146 1 : t.EmitJoinScan(scan->GetAs<::googlesql::ResolvedJoinScan>());
147 2 : EXPECT_NE(sql.find("INNER JOIN"), std::string::npos) << sql;
148 2 : EXPECT_NE(sql.find("__bq_j_"), std::string::npos) << sql;
149 2 : EXPECT_NE(sql.find("__bq_l.\"id\" = __bq_r.\"order_id\""), std::string::npos)
150 2 : << sql;
151 1 : }
152 :
153 1 : TEST_F(TranspilerTest, EmitJoinScanLeftOnCoalesceEquality) {
154 : // bigframes cache joins use COALESCE-sentinel equality predicates in
155 : // LEFT OUTER JOIN ON clauses for row-ordering keys.
156 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
157 1 : "SELECT a.level_0, b.bfuid_col_2 FROM "
158 1 : "(SELECT 0 AS level_0, 'John' AS column_0) a "
159 1 : "LEFT OUTER JOIN "
160 1 : "(SELECT 0 AS bfuid_col_1, 'group_1' AS bfuid_col_2) b "
161 1 : "ON COALESCE(a.level_0, 0) = COALESCE(b.bfuid_col_1, 0) "
162 1 : "AND COALESCE(a.level_0, 1) = COALESCE(b.bfuid_col_1, 1)");
163 1 : ASSERT_NE(stmt, nullptr);
164 1 : TestTranspiler t;
165 1 : std::string sql = t.Transpile(stmt);
166 2 : ASSERT_FALSE(sql.empty()) << "COALESCE join ON must transpile";
167 2 : EXPECT_NE(sql.find("COALESCE"), std::string::npos) << sql;
168 2 : EXPECT_NE(sql.find("LEFT JOIN"), std::string::npos) << sql;
169 1 : }
170 :
171 1 : TEST_F(TranspilerTest, EmitJoinScanInnerUsingSingleColumn) {
172 : // `USING (id)` canonicalizes to `$equal` in `join_expr`, but the
173 : // emit peels the column name back out and emits DuckDB's native
174 : // `USING (...)` so we do not need `$equal` on the disposition table.
175 1 : const ::googlesql::ResolvedStatement* stmt =
176 1 : Analyze("SELECT p.id FROM people p INNER JOIN people p2 USING (id)");
177 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
178 1 : ASSERT_NE(scan, nullptr);
179 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_JOIN_SCAN);
180 1 : const auto* join = scan->GetAs<::googlesql::ResolvedJoinScan>();
181 1 : ASSERT_TRUE(join->has_using());
182 1 : TestTranspiler t;
183 1 : std::string sql = t.EmitJoinScan(join);
184 2 : EXPECT_NE(sql.find("INNER JOIN"), std::string::npos) << sql;
185 2 : EXPECT_NE(sql.find("USING (\"id\")"), std::string::npos) << sql;
186 2 : EXPECT_EQ(sql.find(" ON "), std::string::npos) << sql;
187 1 : }
188 :
189 : // --- Aggregate ----------------------------------------------------------
190 :
191 : } // namespace transpiler
192 : } // namespace duckdb
193 : } // namespace engine
194 : } // namespace backend
195 : } // namespace bigquery_emulator
|