Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_internal.h"
2 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
3 : #include "googlesql/public/types/struct_type.h"
4 :
5 : namespace bigquery_emulator {
6 : namespace backend {
7 : namespace engine {
8 : namespace duckdb {
9 : namespace transpiler {
10 :
11 1 : TEST_F(TranspilerTest, EmitParameterNamed) {
12 : // `SELECT @customer_id` analyzes to a ProjectScan whose only
13 : // computed column is a `ResolvedParameter` carrying the
14 : // analyzer-lowercased name (`customer_id`). We assert on both the
15 : // emitted `$N` placeholder and the bind-order accumulator so a
16 : // regression in either side surfaces here rather than downstream
17 : // in the engine integration.
18 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
19 1 : ASSERT_TRUE(
20 1 : options.AddQueryParameter("customer_id", type_factory_->get_int64())
21 1 : .ok());
22 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
23 1 : stmt = AnalyzeWith("SELECT @customer_id AS x", options);
24 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
25 1 : ASSERT_NE(expr, nullptr);
26 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_PARAMETER);
27 1 : TestTranspiler t;
28 1 : EXPECT_EQ(t.EmitParameter(expr->GetAs<::googlesql::ResolvedParameter>()),
29 1 : "$1");
30 1 : ASSERT_EQ(t.parameter_order().size(), 1u);
31 1 : EXPECT_EQ(t.parameter_order()[0].name, "customer_id");
32 1 : EXPECT_EQ(t.parameter_order()[0].position, 0);
33 1 : }
34 :
35 1 : TEST_F(TranspilerTest, EmitParameterReuseSharesSlot) {
36 : // Two textual references to the same named parameter must share a
37 : // single DuckDB `$N` slot so the engine binds one value, not two.
38 : // We hit `EmitParameter` twice on the same (or equivalent) node and
39 : // assert both emits go to `$1` and the bind-order accumulator
40 : // carries exactly one entry.
41 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
42 1 : ASSERT_TRUE(
43 1 : options.AddQueryParameter("threshold", type_factory_->get_int64()).ok());
44 : // Use the parameter twice in distinct projections: GoogleSQL
45 : // produces two `ResolvedParameter` nodes (one per reference) but
46 : // both carry the same `name()`, so the dedup collapses them.
47 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
48 1 : stmt = AnalyzeWith("SELECT @threshold AS a, @threshold AS b", options);
49 1 : ASSERT_NE(stmt, nullptr);
50 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
51 1 : ASSERT_NE(q, nullptr);
52 1 : const ::googlesql::ResolvedScan* scan = q->query();
53 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_PROJECT_SCAN);
54 1 : const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
55 1 : ASSERT_GE(project->expr_list_size(), 2);
56 1 : TestTranspiler t;
57 1 : EXPECT_EQ(t.EmitParameter(project->expr_list(0)
58 1 : ->expr()
59 1 : ->GetAs<::googlesql::ResolvedParameter>()),
60 1 : "$1");
61 1 : EXPECT_EQ(t.EmitParameter(project->expr_list(1)
62 1 : ->expr()
63 1 : ->GetAs<::googlesql::ResolvedParameter>()),
64 1 : "$1");
65 1 : ASSERT_EQ(t.parameter_order().size(), 1u);
66 1 : EXPECT_EQ(t.parameter_order()[0].name, "threshold");
67 1 : }
68 :
69 1 : TEST_F(TranspilerTest, EmitParameterPositionalAssignsFreshSlots) {
70 : // Positional parameters carry a 1-based `position()` and are
71 : // referentially distinct on every analyzer reference; we never
72 : // dedupe them. Two positional references emit `$1` then `$2` and
73 : // the bind-order accumulator records both with the analyzer
74 : // positions intact.
75 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
76 1 : options.set_parameter_mode(::googlesql::PARAMETER_POSITIONAL);
77 1 : ASSERT_TRUE(
78 1 : options.AddPositionalQueryParameter(type_factory_->get_int64()).ok());
79 1 : ASSERT_TRUE(
80 1 : options.AddPositionalQueryParameter(type_factory_->get_string()).ok());
81 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
82 1 : stmt = AnalyzeWith("SELECT ? AS a, ? AS b", options);
83 1 : ASSERT_NE(stmt, nullptr);
84 1 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
85 1 : ASSERT_NE(q, nullptr);
86 1 : const ::googlesql::ResolvedScan* scan = q->query();
87 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_PROJECT_SCAN);
88 1 : const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
89 1 : ASSERT_GE(project->expr_list_size(), 2);
90 1 : TestTranspiler t;
91 1 : EXPECT_EQ(t.EmitParameter(project->expr_list(0)
92 1 : ->expr()
93 1 : ->GetAs<::googlesql::ResolvedParameter>()),
94 1 : "$1");
95 1 : EXPECT_EQ(t.EmitParameter(project->expr_list(1)
96 1 : ->expr()
97 1 : ->GetAs<::googlesql::ResolvedParameter>()),
98 1 : "$2");
99 1 : ASSERT_EQ(t.parameter_order().size(), 2u);
100 1 : EXPECT_TRUE(t.parameter_order()[0].name.empty());
101 1 : EXPECT_EQ(t.parameter_order()[0].position, 1);
102 1 : EXPECT_TRUE(t.parameter_order()[1].name.empty());
103 1 : EXPECT_EQ(t.parameter_order()[1].position, 2);
104 1 : }
105 :
106 1 : TEST_F(TranspilerTest, EmitLimitOffsetScanWithNamedParameter) {
107 : // `LIMIT @n OFFSET @n` exercises the parameter-in-LIMIT path *and*
108 : // named-parameter dedup inside a single scan emit: both LIMIT and
109 : // OFFSET resolve `@n` to `$1` and the accumulator records one
110 : // entry.
111 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
112 1 : ASSERT_TRUE(options.AddQueryParameter("n", type_factory_->get_int64()).ok());
113 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
114 1 : stmt = AnalyzeWith("SELECT * FROM people ORDER BY id LIMIT @n OFFSET @n",
115 1 : options);
116 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
117 1 : ASSERT_NE(scan, nullptr);
118 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_LIMIT_OFFSET_SCAN);
119 1 : TestTranspiler t;
120 1 : EXPECT_EQ(t.EmitLimitOffsetScan(
121 1 : scan->GetAs<::googlesql::ResolvedLimitOffsetScan>()),
122 1 : "SELECT * FROM (SELECT * FROM (SELECT \"id\", \"name\" FROM "
123 1 : "\"people\") ORDER BY \"id\" ASC NULLS FIRST) LIMIT $1 OFFSET $1");
124 1 : ASSERT_EQ(t.parameter_order().size(), 1u);
125 1 : EXPECT_EQ(t.parameter_order()[0].name, "n");
126 1 : }
127 :
128 1 : TEST_F(TranspilerTest, EmitParameterInsideFunctionArgument) {
129 : // Parameters thread through `EmitFunctionCall`'s argument loop
130 : // exactly like any other expression: `IFNULL(@s, 'x')` lowers to
131 : // `IFNULL($1, 'x')` and the parameter accumulator records the
132 : // single `@s` slot. `IFNULL` is on the function disposition table
133 : // so the surrounding emit composes fully.
134 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
135 1 : ASSERT_TRUE(options.AddQueryParameter("s", type_factory_->get_string()).ok());
136 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
137 1 : stmt = AnalyzeWith("SELECT IFNULL(@s, 'x') FROM people", options);
138 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
139 1 : ASSERT_NE(expr, nullptr);
140 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_FUNCTION_CALL);
141 1 : TestTranspiler t;
142 1 : EXPECT_EQ(
143 1 : t.EmitFunctionCall(expr->GetAs<::googlesql::ResolvedFunctionCall>()),
144 1 : "IFNULL($1, 'x')");
145 1 : ASSERT_EQ(t.parameter_order().size(), 1u);
146 1 : EXPECT_EQ(t.parameter_order()[0].name, "s");
147 1 : }
148 :
149 : // --- Cast ---------------------------------------------------------------
150 :
151 1 : TEST_F(TranspilerTest, EmitCastInt64ToString) {
152 : // `CAST(id AS STRING)` produces a `ResolvedCast` whose `expr` is
153 : // the column ref and whose target `Type` is STRING. The emit
154 : // composes both via `EmitColumnRef` + `ToDuckDBSqlType`, so the
155 : // result threads quoted-identifier and DuckDB type-name conventions
156 : // together.
157 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
158 1 : stmt = Analyze("SELECT CAST(id AS STRING) FROM people");
159 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
160 1 : ASSERT_NE(expr, nullptr);
161 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_CAST);
162 1 : TestTranspiler t;
163 1 : EXPECT_EQ(t.EmitCast(expr->GetAs<::googlesql::ResolvedCast>()),
164 1 : "CAST(\"id\" AS VARCHAR)");
165 1 : }
166 :
167 1 : TEST_F(TranspilerTest, EmitCastStringToInt64) {
168 : // CAST against a column ref of the right source type lands on the
169 : // expected DuckDB `BIGINT` (BQ INT64 -> DuckDB BIGINT, see
170 : // `types.cc`). The shape is symmetrical to the int->string case
171 : // above and pins the type-name mapping for INT64.
172 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
173 1 : stmt = Analyze("SELECT CAST(name AS INT64) FROM people");
174 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
175 1 : ASSERT_NE(expr, nullptr);
176 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_CAST);
177 1 : TestTranspiler t;
178 1 : EXPECT_EQ(t.EmitCast(expr->GetAs<::googlesql::ResolvedCast>()),
179 1 : "CAST(\"name\" AS BIGINT)");
180 1 : }
181 :
182 1 : TEST_F(TranspilerTest, EmitSafeCastUsesTryCast) {
183 : // `SAFE_CAST(<expr> AS T)` sets `return_null_on_error()` on the
184 : // ResolvedCast; we lower it to DuckDB's `TRY_CAST(...)` which
185 : // matches BigQuery's "return NULL on conversion failure" contract.
186 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
187 1 : stmt = Analyze("SELECT SAFE_CAST(name AS INT64) FROM people");
188 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
189 1 : ASSERT_NE(expr, nullptr);
190 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_CAST);
191 1 : TestTranspiler t;
192 1 : EXPECT_EQ(t.EmitCast(expr->GetAs<::googlesql::ResolvedCast>()),
193 1 : "TRY_CAST(\"name\" AS BIGINT)");
194 1 : }
195 :
196 1 : TEST_F(TranspilerTest, EmitCastNestedInsideFunctionCall) {
197 : // CAST nested inside another function call exercises the dispatch
198 : // path: `EmitFunctionCall` calls `EmitExpr` per argument, which
199 : // routes the cast through `EmitCast`. The full lower stays on the
200 : // DuckDB path because both COALESCE (disposition table) and CAST
201 : // (whitelisted target) are first-class.
202 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
203 1 : stmt = Analyze("SELECT COALESCE(CAST(id AS STRING), 'x') FROM people");
204 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
205 1 : ASSERT_NE(expr, nullptr);
206 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_FUNCTION_CALL);
207 1 : TestTranspiler t;
208 1 : EXPECT_EQ(
209 1 : t.EmitFunctionCall(expr->GetAs<::googlesql::ResolvedFunctionCall>()),
210 1 : "COALESCE(CAST(\"id\" AS VARCHAR), 'x')");
211 1 : }
212 :
213 1 : TEST_F(TranspilerTest, EmitCastStructPositionalRemap) {
214 1 : ::googlesql::TypeFactory factory;
215 1 : const ::googlesql::Type* int64 = factory.get_int64();
216 1 : const ::googlesql::Type* str = factory.get_string();
217 1 : std::vector<::googlesql::StructField> src_fields;
218 1 : src_fields.emplace_back("", int64);
219 1 : src_fields.emplace_back("", str);
220 1 : const ::googlesql::StructType* source_st = nullptr;
221 1 : ASSERT_TRUE(factory.MakeStructType(src_fields, &source_st).ok());
222 1 : std::vector<::googlesql::StructField> tgt_fields;
223 1 : tgt_fields.emplace_back("x", int64);
224 1 : tgt_fields.emplace_back("y", str);
225 1 : const ::googlesql::StructType* target_st = nullptr;
226 1 : ASSERT_TRUE(factory.MakeStructType(tgt_fields, &target_st).ok());
227 1 : EXPECT_EQ(internal::EmitStructPositionalCastRemap(
228 1 : "{ '_0': 1, '_1': 'a' }", *source_st, *target_st),
229 1 : "{'x': ({ '_0': 1, '_1': 'a' }).\"_0\", 'y': ({ '_0': 1, '_1': 'a' "
230 1 : "}).\"_1\"}");
231 1 : }
232 :
233 1 : TEST_F(TranspilerTest, EmitCastArrayThreadsThroughColumnRef) {
234 : // ARRAY casts thread `ToDuckDBSqlType`'s recursive type expansion;
235 : // ARRAY<STRING> -> VARCHAR[] mirrors DuckDB's native list-of
236 : // syntax. We wrap a non-const expression (`[id]`) inside the cast
237 : // so the analyzer cannot constant-fold the whole expression onto
238 : // a `ResolvedLiteral` -- a folded array-of-int64 would skip the
239 : // ResolvedCast entirely and the test would fail with "expected
240 : // RESOLVED_CAST, got RESOLVED_LITERAL".
241 1 : const ::googlesql::ResolvedStatement* stmt = nullptr;
242 1 : stmt = Analyze("SELECT CAST([id] AS ARRAY<STRING>) FROM people");
243 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
244 1 : ASSERT_NE(expr, nullptr);
245 1 : ASSERT_EQ(expr->node_kind(), ::googlesql::RESOLVED_CAST);
246 1 : TestTranspiler t;
247 : // `[id]` is a non-const ARRAY constructor; it lowers through
248 : // `$make_array` to DuckDB's bracket syntax.
249 1 : EXPECT_EQ(t.EmitCast(expr->GetAs<::googlesql::ResolvedCast>()),
250 1 : "CAST([\"id\"] AS VARCHAR[])");
251 1 : }
252 :
253 : } // namespace transpiler
254 : } // namespace duckdb
255 : } // namespace engine
256 : } // namespace backend
257 : } // namespace bigquery_emulator
|