Line data Source code
1 : #ifndef BIGQUERY_EMULATOR_BACKEND_ENGINE_DUCKDB_TRANSPILER_TRANSPILER_TEST_FIXTURE_H_
2 : #define BIGQUERY_EMULATOR_BACKEND_ENGINE_DUCKDB_TRANSPILER_TRANSPILER_TEST_FIXTURE_H_
3 :
4 : #include <memory>
5 : #include <string>
6 : #include <vector>
7 :
8 : #include "absl/status/status.h"
9 : #include "absl/status/statusor.h"
10 : #include "absl/strings/str_cat.h"
11 : #include "absl/strings/string_view.h"
12 : #include "backend/engine/disposition.h"
13 : #include "backend/engine/duckdb/transpiler/functions.h"
14 : #include "backend/engine/duckdb/transpiler/transpiler.h"
15 : #include "backend/engine/duckdb/udf/registrar.h"
16 : #include "duckdb.h"
17 : #include "googlesql/public/analyzer.h"
18 : #include "googlesql/public/analyzer_options.h"
19 : #include "googlesql/public/analyzer_output.h"
20 : #include "googlesql/public/builtin_function_options.h"
21 : #include "googlesql/public/catalog.h"
22 : #include "googlesql/public/id_string.h"
23 : #include "googlesql/public/language_options.h"
24 : #include "googlesql/public/options.pb.h"
25 : #include "googlesql/public/simple_catalog.h"
26 : #include "googlesql/public/types/type_factory.h"
27 : #include "googlesql/public/value.h"
28 : #include "googlesql/resolved_ast/resolved_ast.h"
29 : #include "googlesql/resolved_ast/resolved_column.h"
30 : #include "gtest/gtest.h"
31 :
32 : namespace bigquery_emulator {
33 : namespace backend {
34 : namespace engine {
35 : namespace duckdb {
36 : namespace transpiler {
37 :
38 : // Mirrors `duckdb_engine::MakeAnalyzerOptions` so the tests
39 : // resolve names through the same `LanguageOptions` snapshot the
40 : // engine itself uses. Drifting these two breaks function dispatch
41 : // (e.g. `IFNULL` resolves but `COALESCE` does not) in subtle ways
42 : // that only surface in the conformance harness.
43 159 : inline ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
44 159 : ::googlesql::LanguageOptions language;
45 159 : language.EnableMaximumLanguageFeatures();
46 159 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
47 159 : language.set_name_resolution_mode(::googlesql::NAME_RESOLUTION_DEFAULT);
48 159 : ::googlesql::AnalyzerOptions options(language);
49 159 : options.set_error_message_mode(::googlesql::ERROR_MESSAGE_ONE_LINE);
50 : // Match the engine: keep PIVOT / UNPIVOT in their raw resolved-AST
51 : // forms so the transpiler `EmitPivotScan` / `EmitUnpivotScan`
52 : // emit paths are exercised. The engine itself disables these
53 : // rewriters (see `local_coordinator_engine.cc::MakeAnalyzerOptions`)
54 : // because the disposition table routes the raw nodes through
55 : // `duckdb_rewrite`.
56 159 : options.disable_rewrite(::googlesql::REWRITE_PIVOT);
57 159 : options.disable_rewrite(::googlesql::REWRITE_UNPIVOT);
58 159 : options.CreateDefaultArenasIfNotSet();
59 159 : return options;
60 159 : }
61 :
62 : // Helper: synthesize a `ResolvedWithExpr` directly so tests do not
63 : // depend on the analyzer preserving a `WITH(...)` expression against
64 : // constant-folding / inlining heuristics.
65 : struct TestWithExprBinding {
66 : std::string name;
67 : std::unique_ptr<const ::googlesql::ResolvedExpr> expr;
68 : };
69 :
70 : inline std::unique_ptr<::googlesql::ResolvedWithExpr> MakeTestWithExpr(
71 3 : std::vector<TestWithExprBinding> bindings) {
72 3 : if (bindings.empty()) return nullptr;
73 3 : std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>>
74 3 : assignments;
75 3 : std::vector<::googlesql::ResolvedColumn> columns;
76 3 : int next_id = 1;
77 4 : for (auto& binding : bindings) {
78 4 : if (binding.expr == nullptr) return nullptr;
79 4 : const ::googlesql::Type* t = binding.expr->type();
80 4 : ::googlesql::ResolvedColumn col(
81 4 : next_id++,
82 4 : /*table_name=*/::googlesql::IdString::MakeGlobal("$with"),
83 4 : /*name=*/::googlesql::IdString::MakeGlobal(binding.name),
84 4 : t);
85 4 : columns.push_back(col);
86 4 : auto cc =
87 4 : ::googlesql::MakeResolvedComputedColumn(col, std::move(binding.expr));
88 4 : assignments.push_back(std::move(cc));
89 4 : }
90 3 : std::unique_ptr<const ::googlesql::ResolvedExpr> body =
91 3 : ::googlesql::MakeResolvedColumnRef(columns.front(),
92 3 : /*is_correlated=*/false);
93 3 : return ::googlesql::MakeResolvedWithExpr(
94 3 : columns.front().type(), std::move(assignments), std::move(body));
95 3 : }
96 :
97 : // One-stop test fixture. Owns the type factory, catalog, and a
98 : // people table; every test gets a fresh `Transpiler` so the
99 : // per-traversal accumulator (when one lands) starts clean.
100 : class TranspilerTest : public ::testing::Test {
101 : protected:
102 155 : void SetUp() override {
103 155 : type_factory_ = std::make_unique<::googlesql::TypeFactory>();
104 155 : catalog_ = std::make_unique<::googlesql::SimpleCatalog>(
105 155 : "test_catalog", type_factory_.get());
106 155 : ::googlesql::LanguageOptions language;
107 155 : language.EnableMaximumLanguageFeatures();
108 155 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
109 155 : ASSERT_TRUE(catalog_
110 155 : ->AddBuiltinFunctionsAndTypes(
111 155 : ::googlesql::BuiltinFunctionOptions(language))
112 155 : .ok());
113 :
114 155 : auto people = std::make_unique<::googlesql::SimpleTable>(
115 155 : "people",
116 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
117 155 : {"id", type_factory_->get_int64()},
118 155 : {"name", type_factory_->get_string()},
119 155 : });
120 155 : catalog_->AddOwnedTable(std::move(people));
121 :
122 155 : const ::googlesql::Type* int64_array_type = nullptr;
123 155 : EXPECT_TRUE(
124 155 : type_factory_
125 155 : ->MakeArrayType(type_factory_->get_int64(), &int64_array_type)
126 155 : .ok());
127 155 : auto arr_table = std::make_unique<::googlesql::SimpleTable>(
128 155 : "arr_table",
129 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
130 155 : {"id", type_factory_->get_int64()},
131 155 : {"arr", int64_array_type},
132 155 : });
133 155 : catalog_->AddOwnedTable(std::move(arr_table));
134 :
135 : // The join tests need a second table with disjoint column names so
136 : // the analyzer doesn't have to disambiguate references in the ON
137 : // expression; the transpiler doesn't know how to disambiguate yet
138 : // (the per-column emit goes through `ResolvedColumn::name()`).
139 155 : auto orders = std::make_unique<::googlesql::SimpleTable>(
140 155 : "orders",
141 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
142 155 : {"order_id", type_factory_->get_int64()},
143 155 : {"amount", type_factory_->get_int64()},
144 155 : });
145 155 : catalog_->AddOwnedTable(std::move(orders));
146 :
147 : // A table with a string discriminator + numeric value column so the
148 : // PIVOT / UNPIVOT tests have something the analyzer accepts for
149 : // `FOR <expr> IN (<literals>)` (PIVOT) and
150 : // `UNPIVOT(<value_cols> FOR <label_col> IN (<col_groups>))`
151 : // (UNPIVOT).
152 155 : auto sales = std::make_unique<::googlesql::SimpleTable>(
153 155 : "sales",
154 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
155 155 : {"region", type_factory_->get_string()},
156 155 : {"kind", type_factory_->get_string()},
157 155 : {"amount", type_factory_->get_int64()},
158 155 : });
159 155 : catalog_->AddOwnedTable(std::move(sales));
160 :
161 : // Wide table for UNPIVOT: each column is one of the unpivot
162 : // arguments the analyzer threads through `unpivot_arg_list`.
163 155 : auto wide = std::make_unique<::googlesql::SimpleTable>(
164 155 : "wide",
165 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
166 155 : {"region", type_factory_->get_string()},
167 155 : {"q1", type_factory_->get_int64()},
168 155 : {"q2", type_factory_->get_int64()},
169 155 : });
170 155 : catalog_->AddOwnedTable(std::move(wide));
171 :
172 155 : auto org = std::make_unique<::googlesql::SimpleTable>(
173 155 : "org",
174 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
175 155 : {"employee", type_factory_->get_string()},
176 155 : {"manager", type_factory_->get_string()},
177 155 : });
178 155 : catalog_->AddOwnedTable(std::move(org));
179 :
180 155 : auto transactions = std::make_unique<::googlesql::SimpleTable>(
181 155 : "transactions",
182 155 : std::vector<::googlesql::SimpleTable::NameAndType>{
183 155 : {"timestamp", type_factory_->get_timestamp()},
184 155 : {"origin", type_factory_->get_string()},
185 155 : {"destination", type_factory_->get_string()},
186 155 : {"amount", type_factory_->get_numeric()},
187 155 : });
188 155 : catalog_->AddOwnedTable(std::move(transactions));
189 :
190 155 : transpiler_ = std::make_unique<Transpiler>();
191 155 : }
192 :
193 : // Analyze `sql` against the fixture catalog and return the
194 : // resolved AST. The `AnalyzerOutput` lives in `last_output_` so
195 : // the `ResolvedStatement` (and the `Type*` / `Function*` pointers
196 : // it references) stays alive for the duration of the test.
197 153 : const ::googlesql::ResolvedStatement* Analyze(absl::string_view sql) {
198 153 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
199 153 : return AnalyzeWith(sql, options);
200 153 : }
201 :
202 : // Analyze `sql` with `options` already configured -- handy for the
203 : // parameter-emit tests that need `AddQueryParameter` /
204 : // `AddPositionalQueryParameter` calls before analysis. Same
205 : // ownership contract as `Analyze`: the resolved AST lives in
206 : // `last_output_` for the duration of the test.
207 : const ::googlesql::ResolvedStatement* AnalyzeWith(
208 159 : absl::string_view sql, const ::googlesql::AnalyzerOptions& options) {
209 159 : last_output_.reset();
210 159 : absl::Status s = ::googlesql::AnalyzeStatement(
211 159 : sql, options, catalog_.get(), type_factory_.get(), &last_output_);
212 318 : EXPECT_TRUE(s.ok()) << s;
213 159 : if (!s.ok() || last_output_ == nullptr) return nullptr;
214 159 : return last_output_->resolved_statement();
215 159 : }
216 :
217 : // Convenience: pluck the inner `ResolvedScan` out of a
218 : // `SELECT ... FROM ...` statement. We unwrap the ResolvedQueryStmt
219 : // (and the ResolvedProjectScan the analyzer wraps around any
220 : // explicit SELECT list) so the per-shape `Emit*` assertion below
221 : // sees the exact subtree it covers.
222 : const ::googlesql::ResolvedScan* QueryInputScan(
223 51 : const ::googlesql::ResolvedStatement* stmt) {
224 51 : EXPECT_NE(stmt, nullptr);
225 51 : if (stmt == nullptr) return nullptr;
226 51 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
227 51 : EXPECT_NE(q, nullptr);
228 51 : if (q == nullptr) return nullptr;
229 51 : const ::googlesql::ResolvedScan* scan = q->query();
230 88 : while (scan != nullptr &&
231 88 : scan->node_kind() == ::googlesql::RESOLVED_PROJECT_SCAN) {
232 37 : scan = scan->GetAs<::googlesql::ResolvedProjectScan>()->input_scan();
233 37 : }
234 51 : return scan;
235 51 : }
236 :
237 : // Walk down to the first ResolvedExpr we can find inside a SELECT
238 : // list -- handy for testing literal / function / column-ref emit
239 : // without having to also implement EmitProjectScan.
240 : const ::googlesql::ResolvedExpr* QueryFirstSelectExpr(
241 19 : const ::googlesql::ResolvedStatement* stmt) {
242 19 : EXPECT_NE(stmt, nullptr);
243 19 : if (stmt == nullptr) return nullptr;
244 19 : const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
245 19 : if (q == nullptr || q->query() == nullptr) return nullptr;
246 19 : const ::googlesql::ResolvedScan* scan = q->query();
247 19 : if (scan->node_kind() != ::googlesql::RESOLVED_PROJECT_SCAN) return nullptr;
248 19 : const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
249 19 : if (project->expr_list_size() == 0) return nullptr;
250 19 : return project->expr_list(0)->expr();
251 19 : }
252 :
253 : std::unique_ptr<::googlesql::TypeFactory> type_factory_{};
254 : std::unique_ptr<::googlesql::SimpleCatalog> catalog_{};
255 : std::unique_ptr<const ::googlesql::AnalyzerOutput> last_output_{};
256 : std::unique_ptr<Transpiler> transpiler_{};
257 : };
258 :
259 : // Subclass that publishes the protected `Emit*` family so the tests
260 : // can assert on individual emits without having to drive a full
261 : // query through `Transpile`. The class doesn't override anything --
262 : // it just widens the visibility.
263 : class TestTranspiler : public Transpiler {
264 : public:
265 : using Transpiler::EmitAggregateScan;
266 : using Transpiler::EmitAnalyticScan;
267 : using Transpiler::EmitArrayScan;
268 : using Transpiler::EmitCast;
269 : using Transpiler::EmitColumnRef;
270 : using Transpiler::EmitComputedColumn;
271 : using Transpiler::EmitFilterScan;
272 : using Transpiler::EmitFunctionArgument;
273 : using Transpiler::EmitFunctionCall;
274 : using Transpiler::EmitGetJsonField;
275 : using Transpiler::EmitGetStructField;
276 : using Transpiler::EmitJoinScan;
277 : using Transpiler::EmitLimitOffsetScan;
278 : using Transpiler::EmitLiteral;
279 : using Transpiler::EmitMakeStruct;
280 : using Transpiler::EmitOrderByScan;
281 : using Transpiler::EmitOutputColumn;
282 : using Transpiler::EmitParameter;
283 : using Transpiler::EmitPivotScan;
284 : using Transpiler::EmitProjectScan;
285 : using Transpiler::EmitQueryStmt;
286 : using Transpiler::EmitRecursiveRefScan;
287 : using Transpiler::EmitRecursiveScan;
288 : using Transpiler::EmitSampleScan;
289 : using Transpiler::EmitSetOperationScan;
290 : using Transpiler::EmitSingleRowScan;
291 : using Transpiler::EmitSubqueryExpr;
292 : using Transpiler::EmitTableScan;
293 : using Transpiler::EmitUnpivotScan;
294 : using Transpiler::EmitWithExpr;
295 : using Transpiler::EmitWithRefScan;
296 : using Transpiler::EmitWithScan;
297 : };
298 :
299 : // DuckDB-backed binding checker for composition / property tests. Opens an
300 : // in-memory connection, registers polyfill UDFs, and asserts transpiled SQL
301 : // binds (via duckdb_query, which runs parse + bind + plan).
302 : class TranspilerBindFixture : public TranspilerTest {
303 : protected:
304 10 : void SetUp() override {
305 10 : TranspilerTest::SetUp();
306 10 : ASSERT_EQ(::duckdb_open(nullptr, &db_), ::DuckDBSuccess);
307 10 : ASSERT_EQ(::duckdb_connect(db_, &conn_), ::DuckDBSuccess);
308 10 : absl::Status reg = udf::RegisterAll(conn_);
309 20 : ASSERT_TRUE(reg.ok()) << reg;
310 10 : }
311 :
312 10 : void TearDown() override {
313 10 : if (conn_ != nullptr) ::duckdb_disconnect(&conn_);
314 10 : if (db_ != nullptr) ::duckdb_close(&db_);
315 10 : conn_ = nullptr;
316 10 : db_ = nullptr;
317 10 : TranspilerTest::TearDown();
318 10 : }
319 :
320 60 : void ExecDdl(absl::string_view sql) {
321 60 : ::duckdb_result result;
322 120 : ASSERT_EQ(::duckdb_query(conn_, std::string(sql).c_str(), &result),
323 120 : ::DuckDBSuccess)
324 120 : << ::duckdb_result_error(&result);
325 60 : ::duckdb_destroy_result(&result);
326 60 : }
327 :
328 : void AssertTranspileBinds(const ::googlesql::ResolvedStatement* stmt,
329 : absl::string_view source_sql,
330 44 : TestTranspiler* t) {
331 88 : ASSERT_NE(stmt, nullptr) << "analyze failed for:\n" << source_sql;
332 44 : std::string emitted = t->Transpile(stmt);
333 88 : ASSERT_FALSE(emitted.empty()) << "transpiler returned empty SQL for:\n"
334 88 : << source_sql;
335 44 : SCOPED_TRACE(emitted);
336 44 : ::duckdb_result result{};
337 44 : const auto rc = ::duckdb_query(conn_, emitted.c_str(), &result);
338 44 : if (rc != ::DuckDBSuccess) {
339 0 : const char* err = ::duckdb_result_error(&result);
340 0 : FAIL() << "DuckDB rejected transpiled SQL\n"
341 0 : << "source_sql:\n"
342 0 : << source_sql << "\n"
343 0 : << "emitted_sql:\n"
344 0 : << emitted << "\n"
345 0 : << "duckdb_error:\n"
346 0 : << (err == nullptr ? "(null)" : err);
347 0 : }
348 44 : ::duckdb_destroy_result(&result);
349 44 : }
350 :
351 44 : void AssertSqlTranspileBinds(absl::string_view sql) {
352 44 : const ::googlesql::ResolvedStatement* stmt = Analyze(sql);
353 44 : TestTranspiler t;
354 44 : AssertTranspileBinds(stmt, sql, &t);
355 44 : }
356 :
357 : ::duckdb_database db_ = nullptr;
358 : ::duckdb_connection conn_ = nullptr;
359 : };
360 :
361 : } // namespace transpiler
362 : } // namespace duckdb
363 : } // namespace engine
364 : } // namespace backend
365 : } // namespace bigquery_emulator
366 :
367 : #endif // BIGQUERY_EMULATOR_BACKEND_ENGINE_DUCKDB_TRANSPILER_TRANSPILER_TEST_FIXTURE_H_
|