LCOV - code coverage report
Current view: top level - backend/engine/duckdb/transpiler - transpiler_test_fixture.h (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 95.3 % 190 181
Test Date: 2026-07-02 21:01:18 Functions: 100.0 % 12 12

            Line data    Source code
       1              : #ifndef BIGQUERY_EMULATOR_BACKEND_ENGINE_DUCKDB_TRANSPILER_TRANSPILER_TEST_FIXTURE_H_
       2              : #define BIGQUERY_EMULATOR_BACKEND_ENGINE_DUCKDB_TRANSPILER_TRANSPILER_TEST_FIXTURE_H_
       3              : 
       4              : #include <memory>
       5              : #include <string>
       6              : #include <vector>
       7              : 
       8              : #include "absl/status/status.h"
       9              : #include "absl/status/statusor.h"
      10              : #include "absl/strings/str_cat.h"
      11              : #include "absl/strings/string_view.h"
      12              : #include "backend/engine/disposition.h"
      13              : #include "backend/engine/duckdb/transpiler/functions.h"
      14              : #include "backend/engine/duckdb/transpiler/transpiler.h"
      15              : #include "backend/engine/duckdb/udf/registrar.h"
      16              : #include "duckdb.h"
      17              : #include "googlesql/public/analyzer.h"
      18              : #include "googlesql/public/analyzer_options.h"
      19              : #include "googlesql/public/analyzer_output.h"
      20              : #include "googlesql/public/builtin_function_options.h"
      21              : #include "googlesql/public/catalog.h"
      22              : #include "googlesql/public/id_string.h"
      23              : #include "googlesql/public/language_options.h"
      24              : #include "googlesql/public/options.pb.h"
      25              : #include "googlesql/public/simple_catalog.h"
      26              : #include "googlesql/public/types/type_factory.h"
      27              : #include "googlesql/public/value.h"
      28              : #include "googlesql/resolved_ast/resolved_ast.h"
      29              : #include "googlesql/resolved_ast/resolved_column.h"
      30              : #include "gtest/gtest.h"
      31              : 
      32              : namespace bigquery_emulator {
      33              : namespace backend {
      34              : namespace engine {
      35              : namespace duckdb {
      36              : namespace transpiler {
      37              : 
      38              : // Mirrors `duckdb_engine::MakeAnalyzerOptions` so the tests
      39              : // resolve names through the same `LanguageOptions` snapshot the
      40              : // engine itself uses. Drifting these two breaks function dispatch
      41              : // (e.g. `IFNULL` resolves but `COALESCE` does not) in subtle ways
      42              : // that only surface in the conformance harness.
      43          159 : inline ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
      44          159 :   ::googlesql::LanguageOptions language;
      45          159 :   language.EnableMaximumLanguageFeatures();
      46          159 :   language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
      47          159 :   language.set_name_resolution_mode(::googlesql::NAME_RESOLUTION_DEFAULT);
      48          159 :   ::googlesql::AnalyzerOptions options(language);
      49          159 :   options.set_error_message_mode(::googlesql::ERROR_MESSAGE_ONE_LINE);
      50              :   // Match the engine: keep PIVOT / UNPIVOT in their raw resolved-AST
      51              :   // forms so the transpiler `EmitPivotScan` / `EmitUnpivotScan`
      52              :   // emit paths are exercised. The engine itself disables these
      53              :   // rewriters (see `local_coordinator_engine.cc::MakeAnalyzerOptions`)
      54              :   // because the disposition table routes the raw nodes through
      55              :   // `duckdb_rewrite`.
      56          159 :   options.disable_rewrite(::googlesql::REWRITE_PIVOT);
      57          159 :   options.disable_rewrite(::googlesql::REWRITE_UNPIVOT);
      58          159 :   options.CreateDefaultArenasIfNotSet();
      59          159 :   return options;
      60          159 : }
      61              : 
      62              : // Helper: synthesize a `ResolvedWithExpr` directly so tests do not
      63              : // depend on the analyzer preserving a `WITH(...)` expression against
      64              : // constant-folding / inlining heuristics.
      65              : struct TestWithExprBinding {
      66              :   std::string name;
      67              :   std::unique_ptr<const ::googlesql::ResolvedExpr> expr;
      68              : };
      69              : 
      70              : inline std::unique_ptr<::googlesql::ResolvedWithExpr> MakeTestWithExpr(
      71            3 :     std::vector<TestWithExprBinding> bindings) {
      72            3 :   if (bindings.empty()) return nullptr;
      73            3 :   std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>>
      74            3 :       assignments;
      75            3 :   std::vector<::googlesql::ResolvedColumn> columns;
      76            3 :   int next_id = 1;
      77            4 :   for (auto& binding : bindings) {
      78            4 :     if (binding.expr == nullptr) return nullptr;
      79            4 :     const ::googlesql::Type* t = binding.expr->type();
      80            4 :     ::googlesql::ResolvedColumn col(
      81            4 :         next_id++,
      82            4 :         /*table_name=*/::googlesql::IdString::MakeGlobal("$with"),
      83            4 :         /*name=*/::googlesql::IdString::MakeGlobal(binding.name),
      84            4 :         t);
      85            4 :     columns.push_back(col);
      86            4 :     auto cc =
      87            4 :         ::googlesql::MakeResolvedComputedColumn(col, std::move(binding.expr));
      88            4 :     assignments.push_back(std::move(cc));
      89            4 :   }
      90            3 :   std::unique_ptr<const ::googlesql::ResolvedExpr> body =
      91            3 :       ::googlesql::MakeResolvedColumnRef(columns.front(),
      92            3 :                                          /*is_correlated=*/false);
      93            3 :   return ::googlesql::MakeResolvedWithExpr(
      94            3 :       columns.front().type(), std::move(assignments), std::move(body));
      95            3 : }
      96              : 
      97              : // One-stop test fixture. Owns the type factory, catalog, and a
      98              : // people table; every test gets a fresh `Transpiler` so the
      99              : // per-traversal accumulator (when one lands) starts clean.
     100              : class TranspilerTest : public ::testing::Test {
     101              :  protected:
     102          155 :   void SetUp() override {
     103          155 :     type_factory_ = std::make_unique<::googlesql::TypeFactory>();
     104          155 :     catalog_ = std::make_unique<::googlesql::SimpleCatalog>(
     105          155 :         "test_catalog", type_factory_.get());
     106          155 :     ::googlesql::LanguageOptions language;
     107          155 :     language.EnableMaximumLanguageFeatures();
     108          155 :     language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
     109          155 :     ASSERT_TRUE(catalog_
     110          155 :                     ->AddBuiltinFunctionsAndTypes(
     111          155 :                         ::googlesql::BuiltinFunctionOptions(language))
     112          155 :                     .ok());
     113              : 
     114          155 :     auto people = std::make_unique<::googlesql::SimpleTable>(
     115          155 :         "people",
     116          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     117          155 :             {"id", type_factory_->get_int64()},
     118          155 :             {"name", type_factory_->get_string()},
     119          155 :         });
     120          155 :     catalog_->AddOwnedTable(std::move(people));
     121              : 
     122          155 :     const ::googlesql::Type* int64_array_type = nullptr;
     123          155 :     EXPECT_TRUE(
     124          155 :         type_factory_
     125          155 :             ->MakeArrayType(type_factory_->get_int64(), &int64_array_type)
     126          155 :             .ok());
     127          155 :     auto arr_table = std::make_unique<::googlesql::SimpleTable>(
     128          155 :         "arr_table",
     129          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     130          155 :             {"id", type_factory_->get_int64()},
     131          155 :             {"arr", int64_array_type},
     132          155 :         });
     133          155 :     catalog_->AddOwnedTable(std::move(arr_table));
     134              : 
     135              :     // The join tests need a second table with disjoint column names so
     136              :     // the analyzer doesn't have to disambiguate references in the ON
     137              :     // expression; the transpiler doesn't know how to disambiguate yet
     138              :     // (the per-column emit goes through `ResolvedColumn::name()`).
     139          155 :     auto orders = std::make_unique<::googlesql::SimpleTable>(
     140          155 :         "orders",
     141          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     142          155 :             {"order_id", type_factory_->get_int64()},
     143          155 :             {"amount", type_factory_->get_int64()},
     144          155 :         });
     145          155 :     catalog_->AddOwnedTable(std::move(orders));
     146              : 
     147              :     // A table with a string discriminator + numeric value column so the
     148              :     // PIVOT / UNPIVOT tests have something the analyzer accepts for
     149              :     // `FOR <expr> IN (<literals>)` (PIVOT) and
     150              :     // `UNPIVOT(<value_cols> FOR <label_col> IN (<col_groups>))`
     151              :     // (UNPIVOT).
     152          155 :     auto sales = std::make_unique<::googlesql::SimpleTable>(
     153          155 :         "sales",
     154          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     155          155 :             {"region", type_factory_->get_string()},
     156          155 :             {"kind", type_factory_->get_string()},
     157          155 :             {"amount", type_factory_->get_int64()},
     158          155 :         });
     159          155 :     catalog_->AddOwnedTable(std::move(sales));
     160              : 
     161              :     // Wide table for UNPIVOT: each column is one of the unpivot
     162              :     // arguments the analyzer threads through `unpivot_arg_list`.
     163          155 :     auto wide = std::make_unique<::googlesql::SimpleTable>(
     164          155 :         "wide",
     165          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     166          155 :             {"region", type_factory_->get_string()},
     167          155 :             {"q1", type_factory_->get_int64()},
     168          155 :             {"q2", type_factory_->get_int64()},
     169          155 :         });
     170          155 :     catalog_->AddOwnedTable(std::move(wide));
     171              : 
     172          155 :     auto org = std::make_unique<::googlesql::SimpleTable>(
     173          155 :         "org",
     174          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     175          155 :             {"employee", type_factory_->get_string()},
     176          155 :             {"manager", type_factory_->get_string()},
     177          155 :         });
     178          155 :     catalog_->AddOwnedTable(std::move(org));
     179              : 
     180          155 :     auto transactions = std::make_unique<::googlesql::SimpleTable>(
     181          155 :         "transactions",
     182          155 :         std::vector<::googlesql::SimpleTable::NameAndType>{
     183          155 :             {"timestamp", type_factory_->get_timestamp()},
     184          155 :             {"origin", type_factory_->get_string()},
     185          155 :             {"destination", type_factory_->get_string()},
     186          155 :             {"amount", type_factory_->get_numeric()},
     187          155 :         });
     188          155 :     catalog_->AddOwnedTable(std::move(transactions));
     189              : 
     190          155 :     transpiler_ = std::make_unique<Transpiler>();
     191          155 :   }
     192              : 
     193              :   // Analyze `sql` against the fixture catalog and return the
     194              :   // resolved AST. The `AnalyzerOutput` lives in `last_output_` so
     195              :   // the `ResolvedStatement` (and the `Type*` / `Function*` pointers
     196              :   // it references) stays alive for the duration of the test.
     197          153 :   const ::googlesql::ResolvedStatement* Analyze(absl::string_view sql) {
     198          153 :     ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
     199          153 :     return AnalyzeWith(sql, options);
     200          153 :   }
     201              : 
     202              :   // Analyze `sql` with `options` already configured -- handy for the
     203              :   // parameter-emit tests that need `AddQueryParameter` /
     204              :   // `AddPositionalQueryParameter` calls before analysis. Same
     205              :   // ownership contract as `Analyze`: the resolved AST lives in
     206              :   // `last_output_` for the duration of the test.
     207              :   const ::googlesql::ResolvedStatement* AnalyzeWith(
     208          159 :       absl::string_view sql, const ::googlesql::AnalyzerOptions& options) {
     209          159 :     last_output_.reset();
     210          159 :     absl::Status s = ::googlesql::AnalyzeStatement(
     211          159 :         sql, options, catalog_.get(), type_factory_.get(), &last_output_);
     212          318 :     EXPECT_TRUE(s.ok()) << s;
     213          159 :     if (!s.ok() || last_output_ == nullptr) return nullptr;
     214          159 :     return last_output_->resolved_statement();
     215          159 :   }
     216              : 
     217              :   // Convenience: pluck the inner `ResolvedScan` out of a
     218              :   // `SELECT ... FROM ...` statement. We unwrap the ResolvedQueryStmt
     219              :   // (and the ResolvedProjectScan the analyzer wraps around any
     220              :   // explicit SELECT list) so the per-shape `Emit*` assertion below
     221              :   // sees the exact subtree it covers.
     222              :   const ::googlesql::ResolvedScan* QueryInputScan(
     223           51 :       const ::googlesql::ResolvedStatement* stmt) {
     224           51 :     EXPECT_NE(stmt, nullptr);
     225           51 :     if (stmt == nullptr) return nullptr;
     226           51 :     const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
     227           51 :     EXPECT_NE(q, nullptr);
     228           51 :     if (q == nullptr) return nullptr;
     229           51 :     const ::googlesql::ResolvedScan* scan = q->query();
     230           88 :     while (scan != nullptr &&
     231           88 :            scan->node_kind() == ::googlesql::RESOLVED_PROJECT_SCAN) {
     232           37 :       scan = scan->GetAs<::googlesql::ResolvedProjectScan>()->input_scan();
     233           37 :     }
     234           51 :     return scan;
     235           51 :   }
     236              : 
     237              :   // Walk down to the first ResolvedExpr we can find inside a SELECT
     238              :   // list -- handy for testing literal / function / column-ref emit
     239              :   // without having to also implement EmitProjectScan.
     240              :   const ::googlesql::ResolvedExpr* QueryFirstSelectExpr(
     241           19 :       const ::googlesql::ResolvedStatement* stmt) {
     242           19 :     EXPECT_NE(stmt, nullptr);
     243           19 :     if (stmt == nullptr) return nullptr;
     244           19 :     const auto* q = stmt->GetAs<::googlesql::ResolvedQueryStmt>();
     245           19 :     if (q == nullptr || q->query() == nullptr) return nullptr;
     246           19 :     const ::googlesql::ResolvedScan* scan = q->query();
     247           19 :     if (scan->node_kind() != ::googlesql::RESOLVED_PROJECT_SCAN) return nullptr;
     248           19 :     const auto* project = scan->GetAs<::googlesql::ResolvedProjectScan>();
     249           19 :     if (project->expr_list_size() == 0) return nullptr;
     250           19 :     return project->expr_list(0)->expr();
     251           19 :   }
     252              : 
     253              :   std::unique_ptr<::googlesql::TypeFactory> type_factory_{};
     254              :   std::unique_ptr<::googlesql::SimpleCatalog> catalog_{};
     255              :   std::unique_ptr<const ::googlesql::AnalyzerOutput> last_output_{};
     256              :   std::unique_ptr<Transpiler> transpiler_{};
     257              : };
     258              : 
     259              : // Subclass that publishes the protected `Emit*` family so the tests
     260              : // can assert on individual emits without having to drive a full
     261              : // query through `Transpile`. The class doesn't override anything --
     262              : // it just widens the visibility.
     263              : class TestTranspiler : public Transpiler {
     264              :  public:
     265              :   using Transpiler::EmitAggregateScan;
     266              :   using Transpiler::EmitAnalyticScan;
     267              :   using Transpiler::EmitArrayScan;
     268              :   using Transpiler::EmitCast;
     269              :   using Transpiler::EmitColumnRef;
     270              :   using Transpiler::EmitComputedColumn;
     271              :   using Transpiler::EmitFilterScan;
     272              :   using Transpiler::EmitFunctionArgument;
     273              :   using Transpiler::EmitFunctionCall;
     274              :   using Transpiler::EmitGetJsonField;
     275              :   using Transpiler::EmitGetStructField;
     276              :   using Transpiler::EmitJoinScan;
     277              :   using Transpiler::EmitLimitOffsetScan;
     278              :   using Transpiler::EmitLiteral;
     279              :   using Transpiler::EmitMakeStruct;
     280              :   using Transpiler::EmitOrderByScan;
     281              :   using Transpiler::EmitOutputColumn;
     282              :   using Transpiler::EmitParameter;
     283              :   using Transpiler::EmitPivotScan;
     284              :   using Transpiler::EmitProjectScan;
     285              :   using Transpiler::EmitQueryStmt;
     286              :   using Transpiler::EmitRecursiveRefScan;
     287              :   using Transpiler::EmitRecursiveScan;
     288              :   using Transpiler::EmitSampleScan;
     289              :   using Transpiler::EmitSetOperationScan;
     290              :   using Transpiler::EmitSingleRowScan;
     291              :   using Transpiler::EmitSubqueryExpr;
     292              :   using Transpiler::EmitTableScan;
     293              :   using Transpiler::EmitUnpivotScan;
     294              :   using Transpiler::EmitWithExpr;
     295              :   using Transpiler::EmitWithRefScan;
     296              :   using Transpiler::EmitWithScan;
     297              : };
     298              : 
     299              : // DuckDB-backed binding checker for composition / property tests. Opens an
     300              : // in-memory connection, registers polyfill UDFs, and asserts transpiled SQL
     301              : // binds (via duckdb_query, which runs parse + bind + plan).
     302              : class TranspilerBindFixture : public TranspilerTest {
     303              :  protected:
     304           10 :   void SetUp() override {
     305           10 :     TranspilerTest::SetUp();
     306           10 :     ASSERT_EQ(::duckdb_open(nullptr, &db_), ::DuckDBSuccess);
     307           10 :     ASSERT_EQ(::duckdb_connect(db_, &conn_), ::DuckDBSuccess);
     308           10 :     absl::Status reg = udf::RegisterAll(conn_);
     309           20 :     ASSERT_TRUE(reg.ok()) << reg;
     310           10 :   }
     311              : 
     312           10 :   void TearDown() override {
     313           10 :     if (conn_ != nullptr) ::duckdb_disconnect(&conn_);
     314           10 :     if (db_ != nullptr) ::duckdb_close(&db_);
     315           10 :     conn_ = nullptr;
     316           10 :     db_ = nullptr;
     317           10 :     TranspilerTest::TearDown();
     318           10 :   }
     319              : 
     320           60 :   void ExecDdl(absl::string_view sql) {
     321           60 :     ::duckdb_result result;
     322          120 :     ASSERT_EQ(::duckdb_query(conn_, std::string(sql).c_str(), &result),
     323          120 :               ::DuckDBSuccess)
     324          120 :         << ::duckdb_result_error(&result);
     325           60 :     ::duckdb_destroy_result(&result);
     326           60 :   }
     327              : 
     328              :   void AssertTranspileBinds(const ::googlesql::ResolvedStatement* stmt,
     329              :                             absl::string_view source_sql,
     330           44 :                             TestTranspiler* t) {
     331           88 :     ASSERT_NE(stmt, nullptr) << "analyze failed for:\n" << source_sql;
     332           44 :     std::string emitted = t->Transpile(stmt);
     333           88 :     ASSERT_FALSE(emitted.empty()) << "transpiler returned empty SQL for:\n"
     334           88 :                                   << source_sql;
     335           44 :     SCOPED_TRACE(emitted);
     336           44 :     ::duckdb_result result{};
     337           44 :     const auto rc = ::duckdb_query(conn_, emitted.c_str(), &result);
     338           44 :     if (rc != ::DuckDBSuccess) {
     339            0 :       const char* err = ::duckdb_result_error(&result);
     340            0 :       FAIL() << "DuckDB rejected transpiled SQL\n"
     341            0 :              << "source_sql:\n"
     342            0 :              << source_sql << "\n"
     343            0 :              << "emitted_sql:\n"
     344            0 :              << emitted << "\n"
     345            0 :              << "duckdb_error:\n"
     346            0 :              << (err == nullptr ? "(null)" : err);
     347            0 :     }
     348           44 :     ::duckdb_destroy_result(&result);
     349           44 :   }
     350              : 
     351           44 :   void AssertSqlTranspileBinds(absl::string_view sql) {
     352           44 :     const ::googlesql::ResolvedStatement* stmt = Analyze(sql);
     353           44 :     TestTranspiler t;
     354           44 :     AssertTranspileBinds(stmt, sql, &t);
     355           44 :   }
     356              : 
     357              :   ::duckdb_database db_ = nullptr;
     358              :   ::duckdb_connection conn_ = nullptr;
     359              : };
     360              : 
     361              : }  // namespace transpiler
     362              : }  // namespace duckdb
     363              : }  // namespace engine
     364              : }  // namespace backend
     365              : }  // namespace bigquery_emulator
     366              : 
     367              : #endif  // BIGQUERY_EMULATOR_BACKEND_ENGINE_DUCKDB_TRANSPILER_TRANSPILER_TEST_FIXTURE_H_
        

Generated by: LCOV version 2.0-1