Line data Source code
1 : // Unit tests for the local `ResolvedArrayScan` evaluator.
2 : //
3 : // We drive a real `AnalyzeStatement` against a small
4 : // `SimpleCatalog` (matching the pattern in
5 : // `route_classifier_test.cc` / `executor_test.cc`) so the
6 : // `ResolvedArrayScan*` the evaluator sees is the same shape the
7 : // engine sees at runtime. The tests cover each shape family the
8 : // classifier promotes to the semantic executor.
9 :
10 : #include "backend/engine/semantic/array_struct/array_scan.h"
11 :
12 : #include <cstdint>
13 : #include <memory>
14 : #include <string>
15 : #include <vector>
16 :
17 : #include "absl/status/status.h"
18 : #include "absl/status/statusor.h"
19 : #include "backend/engine/semantic/error.h"
20 : #include "backend/engine/semantic/eval_expr.h"
21 : #include "googlesql/public/analyzer.h"
22 : #include "googlesql/public/analyzer_options.h"
23 : #include "googlesql/public/analyzer_output.h"
24 : #include "googlesql/public/builtin_function_options.h"
25 : #include "googlesql/public/catalog.h"
26 : #include "googlesql/public/language_options.h"
27 : #include "googlesql/public/options.pb.h"
28 : #include "googlesql/public/simple_catalog.h"
29 : #include "googlesql/public/types/type_factory.h"
30 : #include "googlesql/resolved_ast/resolved_ast.h"
31 : #include "googlesql/resolved_ast/resolved_node_kind.pb.h"
32 : #include "gtest/gtest.h"
33 :
34 : namespace bigquery_emulator {
35 : namespace backend {
36 : namespace engine {
37 : namespace semantic {
38 : namespace array_struct {
39 : namespace {
40 :
41 8 : ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
42 8 : ::googlesql::LanguageOptions language;
43 8 : language.EnableMaximumLanguageFeatures();
44 8 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
45 8 : ::googlesql::AnalyzerOptions options(language);
46 8 : options.CreateDefaultArenasIfNotSet();
47 8 : return options;
48 8 : }
49 :
50 : class ArrayScanTest : public ::testing::Test {
51 : protected:
52 8 : void SetUp() override {
53 8 : type_factory_ = std::make_unique<::googlesql::TypeFactory>();
54 8 : catalog_ = std::make_unique<::googlesql::SimpleCatalog>(
55 8 : "exec_catalog", type_factory_.get());
56 8 : catalog_->AddBuiltinFunctions(
57 8 : ::googlesql::BuiltinFunctionOptions::AllReleasedFunctions());
58 8 : }
59 :
60 8 : const ::googlesql::ResolvedStatement* Analyze(absl::string_view sql) {
61 8 : last_output_.reset();
62 8 : absl::Status s = ::googlesql::AnalyzeStatement(sql,
63 8 : MakeAnalyzerOptions(),
64 8 : catalog_.get(),
65 8 : type_factory_.get(),
66 8 : &last_output_);
67 16 : EXPECT_TRUE(s.ok()) << s;
68 8 : if (!s.ok() || last_output_ == nullptr) return nullptr;
69 8 : return last_output_->resolved_statement();
70 8 : }
71 :
72 : // Walk the analyzed query down to the inner ResolvedArrayScan
73 : // (the executor's `ResolvedProjectScan(input_scan=
74 : // ResolvedArrayScan(...))` shape).
75 : const ::googlesql::ResolvedArrayScan* AnalyzeArrayScan(
76 8 : absl::string_view sql) {
77 8 : const auto* stmt = Analyze(sql);
78 8 : if (stmt == nullptr ||
79 8 : stmt->node_kind() != ::googlesql::RESOLVED_QUERY_STMT) {
80 0 : return nullptr;
81 0 : }
82 8 : const auto& q = *stmt->GetAs<::googlesql::ResolvedQueryStmt>();
83 8 : const ::googlesql::ResolvedScan* query = q.query();
84 8 : if (query == nullptr ||
85 8 : query->node_kind() != ::googlesql::RESOLVED_PROJECT_SCAN) {
86 0 : return nullptr;
87 0 : }
88 8 : const auto& p = *query->GetAs<::googlesql::ResolvedProjectScan>();
89 8 : if (p.input_scan() == nullptr ||
90 8 : p.input_scan()->node_kind() != ::googlesql::RESOLVED_ARRAY_SCAN) {
91 0 : return nullptr;
92 0 : }
93 8 : return p.input_scan()->GetAs<::googlesql::ResolvedArrayScan>();
94 8 : }
95 :
96 : std::unique_ptr<::googlesql::TypeFactory> type_factory_{};
97 : std::unique_ptr<::googlesql::SimpleCatalog> catalog_{};
98 : std::unique_ptr<const ::googlesql::AnalyzerOutput> last_output_{};
99 : };
100 :
101 1 : TEST_F(ArrayScanTest, StandaloneUnnestEnumeratesElements) {
102 : // Even without WITH OFFSET, the executor walks
103 : // `ResolvedArrayScan` for the property-promoted shapes. Pin the
104 : // baseline: a 3-element literal array emits 3 rows with the
105 : // element column bound row-at-a-time.
106 1 : const auto* scan =
107 1 : AnalyzeArrayScan("SELECT n FROM UNNEST([10, 20, 30]) AS n");
108 1 : ASSERT_NE(scan, nullptr);
109 1 : EvalContext ctx;
110 1 : auto rows = EvaluateArrayScan(*scan, ctx);
111 2 : ASSERT_TRUE(rows.ok()) << rows.status();
112 1 : ASSERT_EQ(rows->size(), 3u);
113 1 : const int col_id = scan->element_column_list(0).column_id();
114 1 : EXPECT_EQ((*rows)[0].at(col_id).int64_value(), 10);
115 1 : EXPECT_EQ((*rows)[1].at(col_id).int64_value(), 20);
116 1 : EXPECT_EQ((*rows)[2].at(col_id).int64_value(), 30);
117 1 : }
118 :
119 1 : TEST_F(ArrayScanTest, UnnestWithOffsetBindsOffsetColumn) {
120 : // `UNNEST(...) WITH OFFSET AS idx` produces one row per element
121 : // with two bindings: the element column and the (0-based)
122 : // offset column. This is // `docs/ENGINE_POLICY.md`.
123 1 : const auto* scan = AnalyzeArrayScan(
124 1 : "SELECT n, idx FROM UNNEST(['a', 'b', 'c']) AS n WITH OFFSET AS idx");
125 1 : ASSERT_NE(scan, nullptr);
126 1 : ASSERT_NE(scan->array_offset_column(), nullptr);
127 1 : EvalContext ctx;
128 1 : auto rows = EvaluateArrayScan(*scan, ctx);
129 2 : ASSERT_TRUE(rows.ok()) << rows.status();
130 1 : ASSERT_EQ(rows->size(), 3u);
131 1 : const int n_col = scan->element_column_list(0).column_id();
132 1 : const int idx_col = scan->array_offset_column()->column().column_id();
133 1 : EXPECT_EQ((*rows)[0].at(n_col).string_value(), "a");
134 1 : EXPECT_EQ((*rows)[0].at(idx_col).int64_value(), 0);
135 1 : EXPECT_EQ((*rows)[1].at(n_col).string_value(), "b");
136 1 : EXPECT_EQ((*rows)[1].at(idx_col).int64_value(), 1);
137 1 : EXPECT_EQ((*rows)[2].at(n_col).string_value(), "c");
138 1 : EXPECT_EQ((*rows)[2].at(idx_col).int64_value(), 2);
139 1 : }
140 :
141 1 : TEST_F(ArrayScanTest, EmptyArrayInnerProducesZeroRows) {
142 : // BigQuery contract: `FROM UNNEST([])` (inner UNNEST on empty
143 : // array) emits zero rows. DuckDB does the same on the standalone
144 : // shape; the test pins that the semantic executor matches.
145 1 : const auto* scan = AnalyzeArrayScan(
146 1 : "SELECT n, idx FROM UNNEST(CAST([] AS ARRAY<INT64>)) AS n WITH OFFSET "
147 1 : "AS idx");
148 1 : ASSERT_NE(scan, nullptr);
149 1 : EvalContext ctx;
150 1 : auto rows = EvaluateArrayScan(*scan, ctx);
151 2 : ASSERT_TRUE(rows.ok()) << rows.status();
152 1 : EXPECT_EQ(rows->size(), 0u);
153 1 : }
154 :
155 : // Note on `is_outer` coverage:
156 : //
157 : // `is_outer == true` is set by GoogleSQL only when the
158 : // `ResolvedArrayScan` is the right-hand side of a
159 : // `LEFT JOIN UNNEST(...)` shape, which inherently requires a
160 : // non-trivial `input_scan` (correlated). That shape is gated by
161 : // `docs/ENGINE_POLICY.md`: until the
162 : // correlated input-scan evaluator lands, `EvaluateArrayScan`
163 : // surfaces `kNotImplemented`, so a syntactic `is_outer=true`
164 : // fixture cannot exercise the empty-array NULL-row branch yet.
165 : //
166 : // The branch's implementation (`row_count == 0 && scan.is_outer()`
167 : // -> emit single bindings row with NULL element and NULL offset)
168 : // is in `array_scan.cc`; Family 4's tests will pin it via a
169 : // `LEFT JOIN UNNEST(t.arr) WITH OFFSET AS idx` fixture when
170 : // the correlated path ships.
171 :
172 1 : TEST_F(ArrayScanTest, MultiArrayUnnestPadProducesPaddedRows) {
173 : // Family 3: multi-array zip with PAD (the default). The shorter
174 : // array pads with NULL element values; the longest determines
175 : // the row count.
176 1 : const auto* scan =
177 1 : AnalyzeArrayScan("SELECT * FROM UNNEST([1, 2, 3], [10, 20])");
178 1 : ASSERT_NE(scan, nullptr);
179 1 : ASSERT_GT(scan->array_expr_list_size(), 1);
180 1 : EvalContext ctx;
181 1 : auto rows = EvaluateArrayScan(*scan, ctx);
182 2 : ASSERT_TRUE(rows.ok()) << rows.status();
183 1 : ASSERT_EQ(rows->size(), 3u);
184 1 : const int a_col = scan->element_column_list(0).column_id();
185 1 : const int b_col = scan->element_column_list(1).column_id();
186 1 : EXPECT_EQ((*rows)[0].at(a_col).int64_value(), 1);
187 1 : EXPECT_EQ((*rows)[0].at(b_col).int64_value(), 10);
188 1 : EXPECT_EQ((*rows)[1].at(a_col).int64_value(), 2);
189 1 : EXPECT_EQ((*rows)[1].at(b_col).int64_value(), 20);
190 1 : EXPECT_EQ((*rows)[2].at(a_col).int64_value(), 3);
191 1 : EXPECT_TRUE((*rows)[2].at(b_col).is_null());
192 1 : }
193 :
194 1 : TEST_F(ArrayScanTest, MultiArrayUnnestTruncateDropsTail) {
195 : // The TRUNCATE mode caps row count at the shortest array.
196 1 : const auto* scan = AnalyzeArrayScan(
197 1 : "SELECT * FROM UNNEST([1, 2, 3], [10, 20], mode => 'TRUNCATE')");
198 1 : ASSERT_NE(scan, nullptr);
199 1 : EvalContext ctx;
200 1 : auto rows = EvaluateArrayScan(*scan, ctx);
201 2 : ASSERT_TRUE(rows.ok()) << rows.status();
202 1 : EXPECT_EQ(rows->size(), 2u);
203 1 : }
204 :
205 1 : TEST_F(ArrayScanTest, MultiArrayUnnestStrictRejectsMismatchedLengths) {
206 : // STRICT requires equal lengths; mismatched -> structured
207 : // INVALID_ARGUMENT.
208 1 : const auto* scan = AnalyzeArrayScan(
209 1 : "SELECT * FROM UNNEST([1, 2, 3], [10, 20], mode => 'STRICT')");
210 1 : ASSERT_NE(scan, nullptr);
211 1 : EvalContext ctx;
212 1 : auto rows = EvaluateArrayScan(*scan, ctx);
213 1 : ASSERT_FALSE(rows.ok());
214 1 : EXPECT_EQ(rows.status().code(), absl::StatusCode::kInvalidArgument);
215 1 : EXPECT_EQ(GetSemanticErrorReason(rows.status()),
216 1 : SemanticErrorReason::kInvalidArgument);
217 1 : }
218 :
219 : TEST_F(ArrayScanTest,
220 1 : CorrelatedInputScanWithoutBindingsSurfacesNotImplemented) {
221 : // `EvaluateArrayScan` is the inner evaluator; `MaterializeArrayScan`
222 : // binds outer rows before calling it. A direct call without
223 : // `parent_ctx.columns` must not silently approximate.
224 1 : const ::googlesql::Type* int64_array = nullptr;
225 1 : ASSERT_TRUE(
226 1 : type_factory_->MakeArrayType(type_factory_->get_int64(), &int64_array)
227 1 : .ok());
228 1 : auto arr_tab = std::make_unique<::googlesql::SimpleTable>(
229 1 : "arr_tab",
230 1 : std::vector<::googlesql::SimpleTable::NameAndType>{
231 1 : {"id", type_factory_->get_int64()},
232 1 : {"arr", int64_array},
233 1 : });
234 1 : catalog_->AddOwnedTable(std::move(arr_tab));
235 1 : const auto* scan =
236 1 : AnalyzeArrayScan("SELECT id, n FROM arr_tab, UNNEST(arr_tab.arr) AS n");
237 1 : ASSERT_NE(scan, nullptr);
238 1 : EvalContext ctx;
239 1 : auto rows = EvaluateArrayScan(*scan, ctx);
240 1 : ASSERT_FALSE(rows.ok());
241 1 : EXPECT_EQ(rows.status().code(), absl::StatusCode::kUnimplemented);
242 1 : }
243 :
244 1 : TEST_F(ArrayScanTest, CorrelatedInputScanWithOuterBindingsUnnestsPerRow) {
245 1 : const ::googlesql::Type* int64_array = nullptr;
246 1 : ASSERT_TRUE(
247 1 : type_factory_->MakeArrayType(type_factory_->get_int64(), &int64_array)
248 1 : .ok());
249 1 : auto arr_tab = std::make_unique<::googlesql::SimpleTable>(
250 1 : "arr_tab",
251 1 : std::vector<::googlesql::SimpleTable::NameAndType>{
252 1 : {"id", type_factory_->get_int64()},
253 1 : {"arr", int64_array},
254 1 : });
255 1 : catalog_->AddOwnedTable(std::move(arr_tab));
256 1 : const auto* scan =
257 1 : AnalyzeArrayScan("SELECT id, n FROM arr_tab, UNNEST(arr_tab.arr) AS n");
258 1 : ASSERT_NE(scan, nullptr);
259 1 : ColumnBindings outer;
260 1 : outer.emplace(scan->input_scan()->column_list(0).column_id(),
261 1 : ::googlesql::Value::Int64(7));
262 1 : outer.emplace(scan->input_scan()->column_list(1).column_id(),
263 1 : ::googlesql::Value::Array(int64_array->AsArray(),
264 1 : {::googlesql::Value::Int64(10),
265 1 : ::googlesql::Value::Int64(20)}));
266 1 : EvalContext ctx;
267 1 : ctx.columns = &outer;
268 1 : auto rows = EvaluateArrayScan(*scan, ctx);
269 2 : ASSERT_TRUE(rows.ok()) << rows.status();
270 1 : ASSERT_EQ(rows->size(), 2u);
271 1 : const int n_col = scan->element_column_list(0).column_id();
272 1 : EXPECT_EQ((*rows)[0].at(n_col).int64_value(), 10);
273 1 : EXPECT_EQ((*rows)[1].at(n_col).int64_value(), 20);
274 1 : }
275 :
276 : } // namespace
277 : } // namespace array_struct
278 : } // namespace semantic
279 : } // namespace engine
280 : } // namespace backend
281 : } // namespace bigquery_emulator
|