Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 :
3 : namespace bigquery_emulator {
4 : namespace backend {
5 : namespace engine {
6 : namespace duckdb {
7 : namespace transpiler {
8 :
9 1 : TEST_F(TranspilerTest, EmitWithExprSingleBindingFromAnalyzer) {
10 : // `WITH(<assigns>, <body>)` is hard to keep alive against
11 : // analyzer constant-folding when both sides are constant, so we
12 : // thread a column ref through a function call (`IFNULL(name,
13 : // 'x')`) for the binding and reuse the binding column twice in
14 : // the body via an outer `IFNULL`. That keeps the binding
15 : // necessary -- inlining would evaluate the
16 : // `IFNULL(name, 'x')` twice, which would break the semantic
17 : // contract the WithExpr exists to preserve.
18 1 : const ::googlesql::ResolvedStatement* stmt =
19 1 : Analyze("SELECT WITH(a AS IFNULL(name, 'x'), IFNULL(a, a)) FROM people");
20 1 : if (stmt == nullptr) {
21 0 : GTEST_SKIP() << "analyzer rejected WITH(...) expression -- skip";
22 0 : }
23 1 : const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
24 1 : if (expr == nullptr || expr->node_kind() != ::googlesql::RESOLVED_WITH_EXPR) {
25 1 : GTEST_SKIP() << "WITH(...) lowered to a non-WithExpr shape -- skip";
26 1 : }
27 0 : TestTranspiler t;
28 0 : EXPECT_EQ(t.EmitWithExpr(expr->GetAs<::googlesql::ResolvedWithExpr>()),
29 0 : "(SELECT IFNULL(\"a\", \"a\") FROM (SELECT IFNULL(\"name\", 'x') "
30 0 : "AS \"a\"))");
31 0 : }
32 :
33 1 : TEST_F(TranspilerTest, EmitWithExprSingleBindingDirect) {
34 : // Direct construction of a `ResolvedWithExpr`: one binding to an
35 : // INT64 literal, body references the binding. This pins the emit
36 : // shape independently of any analyzer rewrite -- we own the AST,
37 : // so a regression in `EmitWithExpr` itself surfaces here without
38 : // needing the WITH(...) parser feature to be on.
39 1 : std::vector<TestWithExprBinding> bindings;
40 1 : bindings.push_back(
41 1 : {"a", ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(42))});
42 1 : auto with_expr = MakeTestWithExpr(std::move(bindings));
43 1 : ASSERT_NE(with_expr, nullptr);
44 1 : TestTranspiler t;
45 1 : EXPECT_EQ(t.EmitWithExpr(with_expr.get()),
46 1 : "(SELECT \"a\" FROM (SELECT 42 AS \"a\"))");
47 1 : }
48 :
49 1 : TEST_F(TranspilerTest, EmitWithExprMultipleBindingsDirect) {
50 : // Two bindings (`a`, `b`) -> body references the first via
51 : // ColumnRef. We pin the emit shape against analyzer rewrites by
52 : // constructing the AST directly.
53 1 : std::vector<TestWithExprBinding> bindings;
54 1 : bindings.push_back(
55 1 : {"a", ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(1))});
56 1 : bindings.push_back(
57 1 : {"b", ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(2))});
58 1 : auto with_expr = MakeTestWithExpr(std::move(bindings));
59 1 : ASSERT_NE(with_expr, nullptr);
60 1 : TestTranspiler t;
61 1 : EXPECT_EQ(t.EmitWithExpr(with_expr.get()),
62 1 : "(SELECT \"a\" FROM (SELECT 1 AS \"a\", 2 AS \"b\"))");
63 1 : }
64 :
65 1 : TEST_F(TranspilerTest, EmitWithExprFallsBackOnUnloweredBinding) {
66 : // Bindings whose expression cannot lower (here a `ResolvedParameter`
67 : // marked untyped, which falls back per `EmitParameter`) propagate
68 : // the empty-string contract through the WithExpr emit.
69 1 : std::vector<TestWithExprBinding> bindings;
70 1 : bindings.push_back({"a",
71 1 : ::googlesql::MakeResolvedParameter(
72 1 : /*type=*/type_factory_->get_int64(),
73 1 : /*name=*/"x",
74 1 : /*position=*/0,
75 1 : /*is_untyped=*/true)});
76 1 : auto with_expr = MakeTestWithExpr(std::move(bindings));
77 1 : ASSERT_NE(with_expr, nullptr);
78 1 : TestTranspiler t;
79 1 : EXPECT_EQ(t.EmitWithExpr(with_expr.get()), "");
80 1 : }
81 :
82 : // --- FunctionArgument ---------------------------------------------------
83 :
84 1 : TEST_F(TranspilerTest, EmitFunctionArgumentRoutesThroughExpr) {
85 : // `ResolvedFunctionArgument` is the wrapper the analyzer produces
86 : // for `generic_argument_list` slots; today's emit only knows how
87 : // to lower the `expr()` slot. Constructing one directly with a
88 : // small literal lets us assert on the routing without needing a
89 : // builtin function whose AST exposes a generic argument list (the
90 : // BigQuery surface that produces them is mostly TVFs / lambdas,
91 : // which is outside this plan).
92 1 : auto literal =
93 1 : ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(42));
94 1 : auto arg = ::googlesql::MakeResolvedFunctionArgument();
95 1 : arg->set_expr(std::move(literal));
96 1 : TestTranspiler t;
97 1 : EXPECT_EQ(t.EmitFunctionArgument(arg.get()), "42");
98 1 : }
99 :
100 1 : TEST_F(TranspilerTest, EmitFunctionArgumentNonExprSlotFallsBack) {
101 : // A bare `MakeResolvedFunctionArgument()` (every slot null) has no
102 : // expression to route through; the emit must propagate "" so the
103 : // engine surfaces UNIMPLEMENTED for the surrounding function call.
104 : // This is the named-argument-only / TVF / lambda shape the plan
105 : // defers to a follow-up.
106 1 : auto arg = ::googlesql::MakeResolvedFunctionArgument();
107 1 : TestTranspiler t;
108 1 : EXPECT_EQ(t.EmitFunctionArgument(arg.get()), "");
109 1 : }
110 :
111 : // --- JSON field access --------------------------------------------------
112 :
113 : // Helper: synthesize a `ResolvedColumnRef` to a JSON-typed column.
114 : // We construct the AST for `EmitGetJsonField` directly so the emit
115 : // is exercised independently of how the analyzer represents BQ JSON
116 : // dot access (which can be analyzer-folded for fully constant LHS).
117 : // Reusing this helper across the JSON tests keeps each case focused
118 : // on the field-name + nested-access shape.
119 : std::unique_ptr<::googlesql::ResolvedColumnRef> MakeJsonColumnRef(
120 5 : const ::googlesql::Type* json_ty) {
121 5 : ::googlesql::ResolvedColumn col(
122 5 : /*column_id=*/1,
123 5 : /*table_name=*/::googlesql::IdString::MakeGlobal("$test"),
124 5 : /*name=*/::googlesql::IdString::MakeGlobal("data"),
125 5 : json_ty);
126 5 : return ::googlesql::MakeResolvedColumnRef(col, /*is_correlated=*/false);
127 5 : }
128 :
129 1 : TEST_F(TranspilerTest, EmitGetJsonFieldObjectAccess) {
130 : // `data.user` where `data` is JSON resolves to a
131 : // `ResolvedGetJsonField` whose `expr` is the column ref and whose
132 : // `field_name` is `user`. The result type is JSON (BQ keeps the
133 : // type as JSON for `<json>.<field>` access), so the emit uses
134 : // DuckDB's `->` operator -- which also returns JSON.
135 1 : const ::googlesql::Type* json_ty = type_factory_->get_json();
136 1 : auto get = ::googlesql::MakeResolvedGetJsonField(json_ty,
137 1 : MakeJsonColumnRef(json_ty),
138 1 : /*field_name=*/"user");
139 1 : TestTranspiler t;
140 1 : EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" -> 'user')");
141 1 : }
142 :
143 1 : TEST_F(TranspilerTest, EmitGetJsonFieldNestedAccess) {
144 : // `data.user.name` chains two `ResolvedGetJsonField` nodes; the
145 : // outer one's `expr` is the inner one's whole `(<json> -> 'user')`
146 : // emit, so the composition lands as `((data -> 'user') -> 'name')`.
147 : // Each level is a fresh `EmitExpr` call so the emit composes
148 : // recursively without any bespoke flattening.
149 1 : const ::googlesql::Type* json_ty = type_factory_->get_json();
150 1 : auto inner = ::googlesql::MakeResolvedGetJsonField(json_ty,
151 1 : MakeJsonColumnRef(json_ty),
152 1 : /*field_name=*/"user");
153 1 : auto outer = ::googlesql::MakeResolvedGetJsonField(json_ty,
154 1 : std::move(inner),
155 1 : /*field_name=*/"name");
156 1 : TestTranspiler t;
157 1 : EXPECT_EQ(t.EmitGetJsonField(outer.get()),
158 1 : "((\"data\" -> 'user') -> 'name')");
159 1 : }
160 :
161 1 : TEST_F(TranspilerTest, EmitGetJsonFieldEscapesSingleQuotes) {
162 : // BigQuery JSON keys can contain arbitrary characters including
163 : // `'`. The DuckDB-side string literal must double the quote so
164 : // the SQL stays well-formed. We do not need a JSON-path escape
165 : // step because the `->` operator takes a STRING (not a JSON path
166 : // expression) so the only escaping that matters is the SQL
167 : // string-literal one `QuoteString` already provides.
168 1 : const ::googlesql::Type* json_ty = type_factory_->get_json();
169 1 : auto get = ::googlesql::MakeResolvedGetJsonField(json_ty,
170 1 : MakeJsonColumnRef(json_ty),
171 1 : /*field_name=*/"O'Brien");
172 1 : TestTranspiler t;
173 1 : EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" -> 'O''Brien')");
174 1 : }
175 :
176 1 : TEST_F(TranspilerTest, EmitGetJsonFieldHandlesUnicodeFieldName) {
177 : // Unicode-bearing JSON field name. `QuoteString` is a byte-wise
178 : // wrapper so multibyte UTF-8 sequences flow through unchanged --
179 : // we pin the assertion on the same UTF-8 bytes the field name
180 : // carries.
181 1 : const ::googlesql::Type* json_ty = type_factory_->get_json();
182 1 : auto get = ::googlesql::MakeResolvedGetJsonField(json_ty,
183 1 : MakeJsonColumnRef(json_ty),
184 1 : /*field_name=*/"naïve");
185 1 : TestTranspiler t;
186 1 : EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" -> 'naïve')");
187 1 : }
188 :
189 1 : TEST_F(TranspilerTest, EmitGetJsonFieldScalarReturnUsesArrowGreater) {
190 : // When the analyzer types the GetJsonField result as something
191 : // other than JSON (a STRING coerced result, in some BQ analyzer
192 : // configurations), the emit picks DuckDB's `->>` operator so the
193 : // returned column is VARCHAR rather than JSON. This pins the
194 : // type-driven branch in `EmitGetJsonField` for the rare
195 : // scalar-coerced case.
196 1 : const ::googlesql::Type* json_ty = type_factory_->get_json();
197 1 : const ::googlesql::Type* string_ty = type_factory_->get_string();
198 1 : auto get = ::googlesql::MakeResolvedGetJsonField(string_ty,
199 1 : MakeJsonColumnRef(json_ty),
200 1 : /*field_name=*/"name");
201 1 : TestTranspiler t;
202 1 : EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" ->> 'name')");
203 1 : }
204 :
205 1 : TEST_F(TranspilerTest, EmitGetJsonFieldNullExprFallsBack) {
206 : // A malformed `ResolvedGetJsonField` with a null inner expression
207 : // can't be lowered; the emit must propagate "" so the engine
208 : // surfaces UNIMPLEMENTED rather than emitting partial SQL. The
209 : // analyzer doesn't produce this shape, but we guard so a future
210 : // change to the GetJsonField construction surface doesn't silently
211 : // emit `(<empty> -> ...)`.
212 1 : auto get = ::googlesql::MakeResolvedGetJsonField();
213 1 : TestTranspiler t;
214 1 : EXPECT_EQ(t.EmitGetJsonField(get.get()), "");
215 1 : }
216 :
217 : // --- Set operations ----------------------------------------------------
218 :
219 : } // namespace transpiler
220 : } // namespace duckdb
221 : } // namespace engine
222 : } // namespace backend
223 : } // namespace bigquery_emulator
|