LCOV - code coverage report
Current view: top level - backend/engine/duckdb/transpiler - transpiler_emit_struct_json_test.cc (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 94.2 % 121 114
Test Date: 2026-07-02 21:01:18 Functions: 100.0 % 13 13

            Line data    Source code
       1              : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
       2              : 
       3              : namespace bigquery_emulator {
       4              : namespace backend {
       5              : namespace engine {
       6              : namespace duckdb {
       7              : namespace transpiler {
       8              : 
       9            1 : TEST_F(TranspilerTest, EmitWithExprSingleBindingFromAnalyzer) {
      10              :   // `WITH(<assigns>, <body>)` is hard to keep alive against
      11              :   // analyzer constant-folding when both sides are constant, so we
      12              :   // thread a column ref through a function call (`IFNULL(name,
      13              :   // 'x')`) for the binding and reuse the binding column twice in
      14              :   // the body via an outer `IFNULL`. That keeps the binding
      15              :   // necessary -- inlining would evaluate the
      16              :   // `IFNULL(name, 'x')` twice, which would break the semantic
      17              :   // contract the WithExpr exists to preserve.
      18            1 :   const ::googlesql::ResolvedStatement* stmt =
      19            1 :       Analyze("SELECT WITH(a AS IFNULL(name, 'x'), IFNULL(a, a)) FROM people");
      20            1 :   if (stmt == nullptr) {
      21            0 :     GTEST_SKIP() << "analyzer rejected WITH(...) expression -- skip";
      22            0 :   }
      23            1 :   const ::googlesql::ResolvedExpr* expr = QueryFirstSelectExpr(stmt);
      24            1 :   if (expr == nullptr || expr->node_kind() != ::googlesql::RESOLVED_WITH_EXPR) {
      25            1 :     GTEST_SKIP() << "WITH(...) lowered to a non-WithExpr shape -- skip";
      26            1 :   }
      27            0 :   TestTranspiler t;
      28            0 :   EXPECT_EQ(t.EmitWithExpr(expr->GetAs<::googlesql::ResolvedWithExpr>()),
      29            0 :             "(SELECT IFNULL(\"a\", \"a\") FROM (SELECT IFNULL(\"name\", 'x') "
      30            0 :             "AS \"a\"))");
      31            0 : }
      32              : 
      33            1 : TEST_F(TranspilerTest, EmitWithExprSingleBindingDirect) {
      34              :   // Direct construction of a `ResolvedWithExpr`: one binding to an
      35              :   // INT64 literal, body references the binding. This pins the emit
      36              :   // shape independently of any analyzer rewrite -- we own the AST,
      37              :   // so a regression in `EmitWithExpr` itself surfaces here without
      38              :   // needing the WITH(...) parser feature to be on.
      39            1 :   std::vector<TestWithExprBinding> bindings;
      40            1 :   bindings.push_back(
      41            1 :       {"a", ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(42))});
      42            1 :   auto with_expr = MakeTestWithExpr(std::move(bindings));
      43            1 :   ASSERT_NE(with_expr, nullptr);
      44            1 :   TestTranspiler t;
      45            1 :   EXPECT_EQ(t.EmitWithExpr(with_expr.get()),
      46            1 :             "(SELECT \"a\" FROM (SELECT 42 AS \"a\"))");
      47            1 : }
      48              : 
      49            1 : TEST_F(TranspilerTest, EmitWithExprMultipleBindingsDirect) {
      50              :   // Two bindings (`a`, `b`) -> body references the first via
      51              :   // ColumnRef. We pin the emit shape against analyzer rewrites by
      52              :   // constructing the AST directly.
      53            1 :   std::vector<TestWithExprBinding> bindings;
      54            1 :   bindings.push_back(
      55            1 :       {"a", ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(1))});
      56            1 :   bindings.push_back(
      57            1 :       {"b", ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(2))});
      58            1 :   auto with_expr = MakeTestWithExpr(std::move(bindings));
      59            1 :   ASSERT_NE(with_expr, nullptr);
      60            1 :   TestTranspiler t;
      61            1 :   EXPECT_EQ(t.EmitWithExpr(with_expr.get()),
      62            1 :             "(SELECT \"a\" FROM (SELECT 1 AS \"a\", 2 AS \"b\"))");
      63            1 : }
      64              : 
      65            1 : TEST_F(TranspilerTest, EmitWithExprFallsBackOnUnloweredBinding) {
      66              :   // Bindings whose expression cannot lower (here a `ResolvedParameter`
      67              :   // marked untyped, which falls back per `EmitParameter`) propagate
      68              :   // the empty-string contract through the WithExpr emit.
      69            1 :   std::vector<TestWithExprBinding> bindings;
      70            1 :   bindings.push_back({"a",
      71            1 :                       ::googlesql::MakeResolvedParameter(
      72            1 :                           /*type=*/type_factory_->get_int64(),
      73            1 :                           /*name=*/"x",
      74            1 :                           /*position=*/0,
      75            1 :                           /*is_untyped=*/true)});
      76            1 :   auto with_expr = MakeTestWithExpr(std::move(bindings));
      77            1 :   ASSERT_NE(with_expr, nullptr);
      78            1 :   TestTranspiler t;
      79            1 :   EXPECT_EQ(t.EmitWithExpr(with_expr.get()), "");
      80            1 : }
      81              : 
      82              : // --- FunctionArgument ---------------------------------------------------
      83              : 
      84            1 : TEST_F(TranspilerTest, EmitFunctionArgumentRoutesThroughExpr) {
      85              :   // `ResolvedFunctionArgument` is the wrapper the analyzer produces
      86              :   // for `generic_argument_list` slots; today's emit only knows how
      87              :   // to lower the `expr()` slot. Constructing one directly with a
      88              :   // small literal lets us assert on the routing without needing a
      89              :   // builtin function whose AST exposes a generic argument list (the
      90              :   // BigQuery surface that produces them is mostly TVFs / lambdas,
      91              :   // which is outside this plan).
      92            1 :   auto literal =
      93            1 :       ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(42));
      94            1 :   auto arg = ::googlesql::MakeResolvedFunctionArgument();
      95            1 :   arg->set_expr(std::move(literal));
      96            1 :   TestTranspiler t;
      97            1 :   EXPECT_EQ(t.EmitFunctionArgument(arg.get()), "42");
      98            1 : }
      99              : 
     100            1 : TEST_F(TranspilerTest, EmitFunctionArgumentNonExprSlotFallsBack) {
     101              :   // A bare `MakeResolvedFunctionArgument()` (every slot null) has no
     102              :   // expression to route through; the emit must propagate "" so the
     103              :   // engine surfaces UNIMPLEMENTED for the surrounding function call.
     104              :   // This is the named-argument-only / TVF / lambda shape the plan
     105              :   // defers to a follow-up.
     106            1 :   auto arg = ::googlesql::MakeResolvedFunctionArgument();
     107            1 :   TestTranspiler t;
     108            1 :   EXPECT_EQ(t.EmitFunctionArgument(arg.get()), "");
     109            1 : }
     110              : 
     111              : // --- JSON field access --------------------------------------------------
     112              : 
     113              : // Helper: synthesize a `ResolvedColumnRef` to a JSON-typed column.
     114              : // We construct the AST for `EmitGetJsonField` directly so the emit
     115              : // is exercised independently of how the analyzer represents BQ JSON
     116              : // dot access (which can be analyzer-folded for fully constant LHS).
     117              : // Reusing this helper across the JSON tests keeps each case focused
     118              : // on the field-name + nested-access shape.
     119              : std::unique_ptr<::googlesql::ResolvedColumnRef> MakeJsonColumnRef(
     120            5 :     const ::googlesql::Type* json_ty) {
     121            5 :   ::googlesql::ResolvedColumn col(
     122            5 :       /*column_id=*/1,
     123            5 :       /*table_name=*/::googlesql::IdString::MakeGlobal("$test"),
     124            5 :       /*name=*/::googlesql::IdString::MakeGlobal("data"),
     125            5 :       json_ty);
     126            5 :   return ::googlesql::MakeResolvedColumnRef(col, /*is_correlated=*/false);
     127            5 : }
     128              : 
     129            1 : TEST_F(TranspilerTest, EmitGetJsonFieldObjectAccess) {
     130              :   // `data.user` where `data` is JSON resolves to a
     131              :   // `ResolvedGetJsonField` whose `expr` is the column ref and whose
     132              :   // `field_name` is `user`. The result type is JSON (BQ keeps the
     133              :   // type as JSON for `<json>.<field>` access), so the emit uses
     134              :   // DuckDB's `->` operator -- which also returns JSON.
     135            1 :   const ::googlesql::Type* json_ty = type_factory_->get_json();
     136            1 :   auto get = ::googlesql::MakeResolvedGetJsonField(json_ty,
     137            1 :                                                    MakeJsonColumnRef(json_ty),
     138            1 :                                                    /*field_name=*/"user");
     139            1 :   TestTranspiler t;
     140            1 :   EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" -> 'user')");
     141            1 : }
     142              : 
     143            1 : TEST_F(TranspilerTest, EmitGetJsonFieldNestedAccess) {
     144              :   // `data.user.name` chains two `ResolvedGetJsonField` nodes; the
     145              :   // outer one's `expr` is the inner one's whole `(<json> -> 'user')`
     146              :   // emit, so the composition lands as `((data -> 'user') -> 'name')`.
     147              :   // Each level is a fresh `EmitExpr` call so the emit composes
     148              :   // recursively without any bespoke flattening.
     149            1 :   const ::googlesql::Type* json_ty = type_factory_->get_json();
     150            1 :   auto inner = ::googlesql::MakeResolvedGetJsonField(json_ty,
     151            1 :                                                      MakeJsonColumnRef(json_ty),
     152            1 :                                                      /*field_name=*/"user");
     153            1 :   auto outer = ::googlesql::MakeResolvedGetJsonField(json_ty,
     154            1 :                                                      std::move(inner),
     155            1 :                                                      /*field_name=*/"name");
     156            1 :   TestTranspiler t;
     157            1 :   EXPECT_EQ(t.EmitGetJsonField(outer.get()),
     158            1 :             "((\"data\" -> 'user') -> 'name')");
     159            1 : }
     160              : 
     161            1 : TEST_F(TranspilerTest, EmitGetJsonFieldEscapesSingleQuotes) {
     162              :   // BigQuery JSON keys can contain arbitrary characters including
     163              :   // `'`. The DuckDB-side string literal must double the quote so
     164              :   // the SQL stays well-formed. We do not need a JSON-path escape
     165              :   // step because the `->` operator takes a STRING (not a JSON path
     166              :   // expression) so the only escaping that matters is the SQL
     167              :   // string-literal one `QuoteString` already provides.
     168            1 :   const ::googlesql::Type* json_ty = type_factory_->get_json();
     169            1 :   auto get = ::googlesql::MakeResolvedGetJsonField(json_ty,
     170            1 :                                                    MakeJsonColumnRef(json_ty),
     171            1 :                                                    /*field_name=*/"O'Brien");
     172            1 :   TestTranspiler t;
     173            1 :   EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" -> 'O''Brien')");
     174            1 : }
     175              : 
     176            1 : TEST_F(TranspilerTest, EmitGetJsonFieldHandlesUnicodeFieldName) {
     177              :   // Unicode-bearing JSON field name. `QuoteString` is a byte-wise
     178              :   // wrapper so multibyte UTF-8 sequences flow through unchanged --
     179              :   // we pin the assertion on the same UTF-8 bytes the field name
     180              :   // carries.
     181            1 :   const ::googlesql::Type* json_ty = type_factory_->get_json();
     182            1 :   auto get = ::googlesql::MakeResolvedGetJsonField(json_ty,
     183            1 :                                                    MakeJsonColumnRef(json_ty),
     184            1 :                                                    /*field_name=*/"naïve");
     185            1 :   TestTranspiler t;
     186            1 :   EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" -> 'naïve')");
     187            1 : }
     188              : 
     189            1 : TEST_F(TranspilerTest, EmitGetJsonFieldScalarReturnUsesArrowGreater) {
     190              :   // When the analyzer types the GetJsonField result as something
     191              :   // other than JSON (a STRING coerced result, in some BQ analyzer
     192              :   // configurations), the emit picks DuckDB's `->>` operator so the
     193              :   // returned column is VARCHAR rather than JSON. This pins the
     194              :   // type-driven branch in `EmitGetJsonField` for the rare
     195              :   // scalar-coerced case.
     196            1 :   const ::googlesql::Type* json_ty = type_factory_->get_json();
     197            1 :   const ::googlesql::Type* string_ty = type_factory_->get_string();
     198            1 :   auto get = ::googlesql::MakeResolvedGetJsonField(string_ty,
     199            1 :                                                    MakeJsonColumnRef(json_ty),
     200            1 :                                                    /*field_name=*/"name");
     201            1 :   TestTranspiler t;
     202            1 :   EXPECT_EQ(t.EmitGetJsonField(get.get()), "(\"data\" ->> 'name')");
     203            1 : }
     204              : 
     205            1 : TEST_F(TranspilerTest, EmitGetJsonFieldNullExprFallsBack) {
     206              :   // A malformed `ResolvedGetJsonField` with a null inner expression
     207              :   // can't be lowered; the emit must propagate "" so the engine
     208              :   // surfaces UNIMPLEMENTED rather than emitting partial SQL. The
     209              :   // analyzer doesn't produce this shape, but we guard so a future
     210              :   // change to the GetJsonField construction surface doesn't silently
     211              :   // emit `(<empty> -> ...)`.
     212            1 :   auto get = ::googlesql::MakeResolvedGetJsonField();
     213            1 :   TestTranspiler t;
     214            1 :   EXPECT_EQ(t.EmitGetJsonField(get.get()), "");
     215            1 : }
     216              : 
     217              : // --- Set operations ----------------------------------------------------
     218              : 
     219              : }  // namespace transpiler
     220              : }  // namespace duckdb
     221              : }  // namespace engine
     222              : }  // namespace backend
     223              : }  // namespace bigquery_emulator
        

Generated by: LCOV version 2.0-1