LCOV - code coverage report
Current view: top level - backend/engine/semantic/array_struct - array_scan_test.cc (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 96.6 % 175 169
Test Date: 2026-07-02 21:01:18 Functions: 100.0 % 12 12

            Line data    Source code
       1              : // Unit tests for the local `ResolvedArrayScan` evaluator.
       2              : //
       3              : // We drive a real `AnalyzeStatement` against a small
       4              : // `SimpleCatalog` (matching the pattern in
       5              : // `route_classifier_test.cc` / `executor_test.cc`) so the
       6              : // `ResolvedArrayScan*` the evaluator sees is the same shape the
       7              : // engine sees at runtime. The tests cover each shape family the
       8              : // classifier promotes to the semantic executor.
       9              : 
      10              : #include "backend/engine/semantic/array_struct/array_scan.h"
      11              : 
      12              : #include <cstdint>
      13              : #include <memory>
      14              : #include <string>
      15              : #include <vector>
      16              : 
      17              : #include "absl/status/status.h"
      18              : #include "absl/status/statusor.h"
      19              : #include "backend/engine/semantic/error.h"
      20              : #include "backend/engine/semantic/eval_expr.h"
      21              : #include "googlesql/public/analyzer.h"
      22              : #include "googlesql/public/analyzer_options.h"
      23              : #include "googlesql/public/analyzer_output.h"
      24              : #include "googlesql/public/builtin_function_options.h"
      25              : #include "googlesql/public/catalog.h"
      26              : #include "googlesql/public/language_options.h"
      27              : #include "googlesql/public/options.pb.h"
      28              : #include "googlesql/public/simple_catalog.h"
      29              : #include "googlesql/public/types/type_factory.h"
      30              : #include "googlesql/resolved_ast/resolved_ast.h"
      31              : #include "googlesql/resolved_ast/resolved_node_kind.pb.h"
      32              : #include "gtest/gtest.h"
      33              : 
      34              : namespace bigquery_emulator {
      35              : namespace backend {
      36              : namespace engine {
      37              : namespace semantic {
      38              : namespace array_struct {
      39              : namespace {
      40              : 
      41            8 : ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
      42            8 :   ::googlesql::LanguageOptions language;
      43            8 :   language.EnableMaximumLanguageFeatures();
      44            8 :   language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
      45            8 :   ::googlesql::AnalyzerOptions options(language);
      46            8 :   options.CreateDefaultArenasIfNotSet();
      47            8 :   return options;
      48            8 : }
      49              : 
      50              : class ArrayScanTest : public ::testing::Test {
      51              :  protected:
      52            8 :   void SetUp() override {
      53            8 :     type_factory_ = std::make_unique<::googlesql::TypeFactory>();
      54            8 :     catalog_ = std::make_unique<::googlesql::SimpleCatalog>(
      55            8 :         "exec_catalog", type_factory_.get());
      56            8 :     catalog_->AddBuiltinFunctions(
      57            8 :         ::googlesql::BuiltinFunctionOptions::AllReleasedFunctions());
      58            8 :   }
      59              : 
      60            8 :   const ::googlesql::ResolvedStatement* Analyze(absl::string_view sql) {
      61            8 :     last_output_.reset();
      62            8 :     absl::Status s = ::googlesql::AnalyzeStatement(sql,
      63            8 :                                                    MakeAnalyzerOptions(),
      64            8 :                                                    catalog_.get(),
      65            8 :                                                    type_factory_.get(),
      66            8 :                                                    &last_output_);
      67           16 :     EXPECT_TRUE(s.ok()) << s;
      68            8 :     if (!s.ok() || last_output_ == nullptr) return nullptr;
      69            8 :     return last_output_->resolved_statement();
      70            8 :   }
      71              : 
      72              :   // Walk the analyzed query down to the inner ResolvedArrayScan
      73              :   // (the executor's `ResolvedProjectScan(input_scan=
      74              :   // ResolvedArrayScan(...))` shape).
      75              :   const ::googlesql::ResolvedArrayScan* AnalyzeArrayScan(
      76            8 :       absl::string_view sql) {
      77            8 :     const auto* stmt = Analyze(sql);
      78            8 :     if (stmt == nullptr ||
      79            8 :         stmt->node_kind() != ::googlesql::RESOLVED_QUERY_STMT) {
      80            0 :       return nullptr;
      81            0 :     }
      82            8 :     const auto& q = *stmt->GetAs<::googlesql::ResolvedQueryStmt>();
      83            8 :     const ::googlesql::ResolvedScan* query = q.query();
      84            8 :     if (query == nullptr ||
      85            8 :         query->node_kind() != ::googlesql::RESOLVED_PROJECT_SCAN) {
      86            0 :       return nullptr;
      87            0 :     }
      88            8 :     const auto& p = *query->GetAs<::googlesql::ResolvedProjectScan>();
      89            8 :     if (p.input_scan() == nullptr ||
      90            8 :         p.input_scan()->node_kind() != ::googlesql::RESOLVED_ARRAY_SCAN) {
      91            0 :       return nullptr;
      92            0 :     }
      93            8 :     return p.input_scan()->GetAs<::googlesql::ResolvedArrayScan>();
      94            8 :   }
      95              : 
      96              :   std::unique_ptr<::googlesql::TypeFactory> type_factory_{};
      97              :   std::unique_ptr<::googlesql::SimpleCatalog> catalog_{};
      98              :   std::unique_ptr<const ::googlesql::AnalyzerOutput> last_output_{};
      99              : };
     100              : 
     101            1 : TEST_F(ArrayScanTest, StandaloneUnnestEnumeratesElements) {
     102              :   // Even without WITH OFFSET, the executor walks
     103              :   // `ResolvedArrayScan` for the property-promoted shapes. Pin the
     104              :   // baseline: a 3-element literal array emits 3 rows with the
     105              :   // element column bound row-at-a-time.
     106            1 :   const auto* scan =
     107            1 :       AnalyzeArrayScan("SELECT n FROM UNNEST([10, 20, 30]) AS n");
     108            1 :   ASSERT_NE(scan, nullptr);
     109            1 :   EvalContext ctx;
     110            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     111            2 :   ASSERT_TRUE(rows.ok()) << rows.status();
     112            1 :   ASSERT_EQ(rows->size(), 3u);
     113            1 :   const int col_id = scan->element_column_list(0).column_id();
     114            1 :   EXPECT_EQ((*rows)[0].at(col_id).int64_value(), 10);
     115            1 :   EXPECT_EQ((*rows)[1].at(col_id).int64_value(), 20);
     116            1 :   EXPECT_EQ((*rows)[2].at(col_id).int64_value(), 30);
     117            1 : }
     118              : 
     119            1 : TEST_F(ArrayScanTest, UnnestWithOffsetBindsOffsetColumn) {
     120              :   // `UNNEST(...) WITH OFFSET AS idx` produces one row per element
     121              :   // with two bindings: the element column and the (0-based)
     122              :   // offset column. This is   // `docs/ENGINE_POLICY.md`.
     123            1 :   const auto* scan = AnalyzeArrayScan(
     124            1 :       "SELECT n, idx FROM UNNEST(['a', 'b', 'c']) AS n WITH OFFSET AS idx");
     125            1 :   ASSERT_NE(scan, nullptr);
     126            1 :   ASSERT_NE(scan->array_offset_column(), nullptr);
     127            1 :   EvalContext ctx;
     128            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     129            2 :   ASSERT_TRUE(rows.ok()) << rows.status();
     130            1 :   ASSERT_EQ(rows->size(), 3u);
     131            1 :   const int n_col = scan->element_column_list(0).column_id();
     132            1 :   const int idx_col = scan->array_offset_column()->column().column_id();
     133            1 :   EXPECT_EQ((*rows)[0].at(n_col).string_value(), "a");
     134            1 :   EXPECT_EQ((*rows)[0].at(idx_col).int64_value(), 0);
     135            1 :   EXPECT_EQ((*rows)[1].at(n_col).string_value(), "b");
     136            1 :   EXPECT_EQ((*rows)[1].at(idx_col).int64_value(), 1);
     137            1 :   EXPECT_EQ((*rows)[2].at(n_col).string_value(), "c");
     138            1 :   EXPECT_EQ((*rows)[2].at(idx_col).int64_value(), 2);
     139            1 : }
     140              : 
     141            1 : TEST_F(ArrayScanTest, EmptyArrayInnerProducesZeroRows) {
     142              :   // BigQuery contract: `FROM UNNEST([])` (inner UNNEST on empty
     143              :   // array) emits zero rows. DuckDB does the same on the standalone
     144              :   // shape; the test pins that the semantic executor matches.
     145            1 :   const auto* scan = AnalyzeArrayScan(
     146            1 :       "SELECT n, idx FROM UNNEST(CAST([] AS ARRAY<INT64>)) AS n WITH OFFSET "
     147            1 :       "AS idx");
     148            1 :   ASSERT_NE(scan, nullptr);
     149            1 :   EvalContext ctx;
     150            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     151            2 :   ASSERT_TRUE(rows.ok()) << rows.status();
     152            1 :   EXPECT_EQ(rows->size(), 0u);
     153            1 : }
     154              : 
     155              : // Note on `is_outer` coverage:
     156              : //
     157              : // `is_outer == true` is set by GoogleSQL only when the
     158              : // `ResolvedArrayScan` is the right-hand side of a
     159              : // `LEFT JOIN UNNEST(...)` shape, which inherently requires a
     160              : // non-trivial `input_scan` (correlated). That shape is gated by
     161              : // `docs/ENGINE_POLICY.md`: until the
     162              : // correlated input-scan evaluator lands, `EvaluateArrayScan`
     163              : // surfaces `kNotImplemented`, so a syntactic `is_outer=true`
     164              : // fixture cannot exercise the empty-array NULL-row branch yet.
     165              : //
     166              : // The branch's implementation (`row_count == 0 && scan.is_outer()`
     167              : // -> emit single bindings row with NULL element and NULL offset)
     168              : // is in `array_scan.cc`; Family 4's tests will pin it via a
     169              : // `LEFT JOIN UNNEST(t.arr) WITH OFFSET AS idx` fixture when
     170              : // the correlated path ships.
     171              : 
     172            1 : TEST_F(ArrayScanTest, MultiArrayUnnestPadProducesPaddedRows) {
     173              :   // Family 3: multi-array zip with PAD (the default). The shorter
     174              :   // array pads with NULL element values; the longest determines
     175              :   // the row count.
     176            1 :   const auto* scan =
     177            1 :       AnalyzeArrayScan("SELECT * FROM UNNEST([1, 2, 3], [10, 20])");
     178            1 :   ASSERT_NE(scan, nullptr);
     179            1 :   ASSERT_GT(scan->array_expr_list_size(), 1);
     180            1 :   EvalContext ctx;
     181            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     182            2 :   ASSERT_TRUE(rows.ok()) << rows.status();
     183            1 :   ASSERT_EQ(rows->size(), 3u);
     184            1 :   const int a_col = scan->element_column_list(0).column_id();
     185            1 :   const int b_col = scan->element_column_list(1).column_id();
     186            1 :   EXPECT_EQ((*rows)[0].at(a_col).int64_value(), 1);
     187            1 :   EXPECT_EQ((*rows)[0].at(b_col).int64_value(), 10);
     188            1 :   EXPECT_EQ((*rows)[1].at(a_col).int64_value(), 2);
     189            1 :   EXPECT_EQ((*rows)[1].at(b_col).int64_value(), 20);
     190            1 :   EXPECT_EQ((*rows)[2].at(a_col).int64_value(), 3);
     191            1 :   EXPECT_TRUE((*rows)[2].at(b_col).is_null());
     192            1 : }
     193              : 
     194            1 : TEST_F(ArrayScanTest, MultiArrayUnnestTruncateDropsTail) {
     195              :   // The TRUNCATE mode caps row count at the shortest array.
     196            1 :   const auto* scan = AnalyzeArrayScan(
     197            1 :       "SELECT * FROM UNNEST([1, 2, 3], [10, 20], mode => 'TRUNCATE')");
     198            1 :   ASSERT_NE(scan, nullptr);
     199            1 :   EvalContext ctx;
     200            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     201            2 :   ASSERT_TRUE(rows.ok()) << rows.status();
     202            1 :   EXPECT_EQ(rows->size(), 2u);
     203            1 : }
     204              : 
     205            1 : TEST_F(ArrayScanTest, MultiArrayUnnestStrictRejectsMismatchedLengths) {
     206              :   // STRICT requires equal lengths; mismatched -> structured
     207              :   // INVALID_ARGUMENT.
     208            1 :   const auto* scan = AnalyzeArrayScan(
     209            1 :       "SELECT * FROM UNNEST([1, 2, 3], [10, 20], mode => 'STRICT')");
     210            1 :   ASSERT_NE(scan, nullptr);
     211            1 :   EvalContext ctx;
     212            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     213            1 :   ASSERT_FALSE(rows.ok());
     214            1 :   EXPECT_EQ(rows.status().code(), absl::StatusCode::kInvalidArgument);
     215            1 :   EXPECT_EQ(GetSemanticErrorReason(rows.status()),
     216            1 :             SemanticErrorReason::kInvalidArgument);
     217            1 : }
     218              : 
     219              : TEST_F(ArrayScanTest,
     220            1 :        CorrelatedInputScanWithoutBindingsSurfacesNotImplemented) {
     221              :   // `EvaluateArrayScan` is the inner evaluator; `MaterializeArrayScan`
     222              :   // binds outer rows before calling it. A direct call without
     223              :   // `parent_ctx.columns` must not silently approximate.
     224            1 :   const ::googlesql::Type* int64_array = nullptr;
     225            1 :   ASSERT_TRUE(
     226            1 :       type_factory_->MakeArrayType(type_factory_->get_int64(), &int64_array)
     227            1 :           .ok());
     228            1 :   auto arr_tab = std::make_unique<::googlesql::SimpleTable>(
     229            1 :       "arr_tab",
     230            1 :       std::vector<::googlesql::SimpleTable::NameAndType>{
     231            1 :           {"id", type_factory_->get_int64()},
     232            1 :           {"arr", int64_array},
     233            1 :       });
     234            1 :   catalog_->AddOwnedTable(std::move(arr_tab));
     235            1 :   const auto* scan =
     236            1 :       AnalyzeArrayScan("SELECT id, n FROM arr_tab, UNNEST(arr_tab.arr) AS n");
     237            1 :   ASSERT_NE(scan, nullptr);
     238            1 :   EvalContext ctx;
     239            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     240            1 :   ASSERT_FALSE(rows.ok());
     241            1 :   EXPECT_EQ(rows.status().code(), absl::StatusCode::kUnimplemented);
     242            1 : }
     243              : 
     244            1 : TEST_F(ArrayScanTest, CorrelatedInputScanWithOuterBindingsUnnestsPerRow) {
     245            1 :   const ::googlesql::Type* int64_array = nullptr;
     246            1 :   ASSERT_TRUE(
     247            1 :       type_factory_->MakeArrayType(type_factory_->get_int64(), &int64_array)
     248            1 :           .ok());
     249            1 :   auto arr_tab = std::make_unique<::googlesql::SimpleTable>(
     250            1 :       "arr_tab",
     251            1 :       std::vector<::googlesql::SimpleTable::NameAndType>{
     252            1 :           {"id", type_factory_->get_int64()},
     253            1 :           {"arr", int64_array},
     254            1 :       });
     255            1 :   catalog_->AddOwnedTable(std::move(arr_tab));
     256            1 :   const auto* scan =
     257            1 :       AnalyzeArrayScan("SELECT id, n FROM arr_tab, UNNEST(arr_tab.arr) AS n");
     258            1 :   ASSERT_NE(scan, nullptr);
     259            1 :   ColumnBindings outer;
     260            1 :   outer.emplace(scan->input_scan()->column_list(0).column_id(),
     261            1 :                 ::googlesql::Value::Int64(7));
     262            1 :   outer.emplace(scan->input_scan()->column_list(1).column_id(),
     263            1 :                 ::googlesql::Value::Array(int64_array->AsArray(),
     264            1 :                                           {::googlesql::Value::Int64(10),
     265            1 :                                            ::googlesql::Value::Int64(20)}));
     266            1 :   EvalContext ctx;
     267            1 :   ctx.columns = &outer;
     268            1 :   auto rows = EvaluateArrayScan(*scan, ctx);
     269            2 :   ASSERT_TRUE(rows.ok()) << rows.status();
     270            1 :   ASSERT_EQ(rows->size(), 2u);
     271            1 :   const int n_col = scan->element_column_list(0).column_id();
     272            1 :   EXPECT_EQ((*rows)[0].at(n_col).int64_value(), 10);
     273            1 :   EXPECT_EQ((*rows)[1].at(n_col).int64_value(), 20);
     274            1 : }
     275              : 
     276              : }  // namespace
     277              : }  // namespace array_struct
     278              : }  // namespace semantic
     279              : }  // namespace engine
     280              : }  // namespace backend
     281              : }  // namespace bigquery_emulator
        

Generated by: LCOV version 2.0-1