Line data Source code
1 : // Unit tests for `SemanticExecutor`.
2 : //
3 : // We drive a real `AnalyzeStatement` against a tiny `SimpleCatalog`
4 : // (mirroring the conformance harness used in
5 : // `route_classifier_test.cc` / `stub_executors_test.cc`) and run
6 : // `SemanticExecutor::ExecuteQuery` over the analyzer's resolved
7 : // statement directly. The tests pin the end-to-end happy paths
8 : // (scalar SELECT + arithmetic + parameter binding) and the
9 : // error-surface mappings the gateway depends on
10 : // (`SELECT 1 / 0 -> divisionByZero`, `SELECT INT64_MAX + 1 ->
11 : // overflow`).
12 :
13 : #include "backend/engine/semantic/executor.h"
14 :
15 : #include <algorithm>
16 : #include <memory>
17 : #include <string>
18 : #include <vector>
19 :
20 : #include "absl/status/status.h"
21 : #include "absl/status/statusor.h"
22 : #include "backend/engine/engine.h"
23 : #include "backend/engine/semantic/error.h"
24 : #include "backend/storage/storage.h"
25 : #include "googlesql/public/analyzer.h"
26 : #include "googlesql/public/analyzer_options.h"
27 : #include "googlesql/public/analyzer_output.h"
28 : #include "googlesql/public/builtin_function_options.h"
29 : #include "googlesql/public/catalog.h"
30 : #include "googlesql/public/id_string.h"
31 : #include "googlesql/public/language_options.h"
32 : #include "googlesql/public/options.pb.h"
33 : #include "googlesql/public/simple_catalog.h"
34 : #include "googlesql/public/types/type_factory.h"
35 : #include "googlesql/public/value.h"
36 : #include "googlesql/resolved_ast/resolved_ast.h"
37 : #include "googlesql/resolved_ast/resolved_column.h"
38 : #include "gtest/gtest.h"
39 :
40 : namespace bigquery_emulator {
41 : namespace backend {
42 : namespace engine {
43 : namespace semantic {
44 : namespace {
45 :
46 15 : ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
47 15 : ::googlesql::LanguageOptions language;
48 15 : language.EnableMaximumLanguageFeatures();
49 15 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
50 15 : ::googlesql::AnalyzerOptions options(language);
51 15 : options.CreateDefaultArenasIfNotSet();
52 15 : return options;
53 15 : }
54 :
55 : class SemanticExecutorTest : public ::testing::Test {
56 : protected:
57 16 : void SetUp() override {
58 16 : type_factory_ = std::make_unique<::googlesql::TypeFactory>();
59 16 : catalog_ = std::make_unique<::googlesql::SimpleCatalog>(
60 16 : "exec_catalog", type_factory_.get());
61 16 : catalog_->AddBuiltinFunctions(
62 16 : ::googlesql::BuiltinFunctionOptions::AllReleasedFunctions());
63 16 : }
64 :
65 : const ::googlesql::ResolvedStatement* Analyze(
66 15 : absl::string_view sql, const ::googlesql::AnalyzerOptions& options) {
67 15 : last_output_.reset();
68 15 : absl::Status s = ::googlesql::AnalyzeStatement(
69 15 : sql, options, catalog_.get(), type_factory_.get(), &last_output_);
70 30 : EXPECT_TRUE(s.ok()) << s;
71 15 : if (!s.ok() || last_output_ == nullptr) return nullptr;
72 15 : return last_output_->resolved_statement();
73 15 : }
74 :
75 16 : QueryRequest MakeRequest(absl::string_view sql) {
76 16 : QueryRequest req;
77 16 : req.project_id = "test-project";
78 16 : req.sql = std::string(sql);
79 16 : return req;
80 16 : }
81 :
82 : // Drain a single-row output and return the first cell.
83 : absl::StatusOr<storage::Value> RunForFirstCell(
84 : const std::string& sql,
85 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions(),
86 4 : QueryRequest req = QueryRequest{}) {
87 4 : const auto* stmt = Analyze(sql, options);
88 4 : if (stmt == nullptr) return absl::InternalError("analyzer failed");
89 4 : if (req.sql.empty()) req = MakeRequest(sql);
90 4 : SemanticExecutor exec;
91 4 : auto source = exec.ExecuteQuery(req, *stmt, catalog_.get());
92 4 : if (!source.ok()) return source.status();
93 4 : storage::Row row;
94 4 : auto has = (*source)->Next(&row);
95 4 : if (!has.ok()) return has.status();
96 4 : if (!*has) return absl::InternalError("executor returned no rows");
97 4 : if (row.cells.empty()) return absl::InternalError("row has no cells");
98 4 : return row.cells[0];
99 4 : }
100 :
101 : std::unique_ptr<::googlesql::TypeFactory> type_factory_{};
102 : std::unique_ptr<::googlesql::SimpleCatalog> catalog_{};
103 : std::unique_ptr<const ::googlesql::AnalyzerOutput> last_output_{};
104 : };
105 :
106 1 : TEST_F(SemanticExecutorTest, ScalarSelectOneRoundTrips) {
107 1 : auto cell = RunForFirstCell("SELECT 1");
108 2 : ASSERT_TRUE(cell.ok()) << cell.status();
109 1 : EXPECT_EQ(cell->int64_value(), 1);
110 1 : }
111 :
112 1 : TEST_F(SemanticExecutorTest, ScalarSelectArithmeticRoundTrips) {
113 1 : auto cell = RunForFirstCell("SELECT 1 + 2");
114 2 : ASSERT_TRUE(cell.ok()) << cell.status();
115 1 : EXPECT_EQ(cell->int64_value(), 3);
116 1 : }
117 :
118 1 : TEST_F(SemanticExecutorTest, ScalarSelectMultipleColumnsRoundTrip) {
119 1 : const auto* stmt = Analyze("SELECT 1 AS a, 'x' AS b", MakeAnalyzerOptions());
120 1 : ASSERT_NE(stmt, nullptr);
121 1 : SemanticExecutor exec;
122 1 : auto source =
123 1 : exec.ExecuteQuery(MakeRequest("SELECT 1, 'x'"), *stmt, catalog_.get());
124 2 : ASSERT_TRUE(source.ok()) << source.status();
125 1 : ASSERT_EQ((*source)->schema().columns.size(), 2u);
126 1 : EXPECT_EQ((*source)->schema().columns[0].name, "a");
127 1 : EXPECT_EQ((*source)->schema().columns[1].name, "b");
128 1 : storage::Row row;
129 1 : auto has = (*source)->Next(&row);
130 2 : ASSERT_TRUE(has.ok()) << has.status();
131 1 : ASSERT_TRUE(*has);
132 1 : ASSERT_EQ(row.cells.size(), 2u);
133 1 : EXPECT_EQ(row.cells[0].int64_value(), 1);
134 1 : EXPECT_EQ(row.cells[1].string_value(), "x");
135 1 : has = (*source)->Next(&row);
136 2 : ASSERT_TRUE(has.ok()) << has.status();
137 1 : EXPECT_FALSE(*has);
138 1 : }
139 :
140 1 : TEST_F(SemanticExecutorTest, NullAdditionPropagatesNull) {
141 1 : auto cell = RunForFirstCell("SELECT CAST(NULL AS INT64) + 1");
142 2 : ASSERT_TRUE(cell.ok()) << cell.status();
143 1 : EXPECT_TRUE(cell->is_null());
144 1 : }
145 :
146 1 : TEST_F(SemanticExecutorTest, DivisionByZeroSurfacesReason) {
147 1 : const auto* stmt = Analyze("SELECT 1.0 / 0", MakeAnalyzerOptions());
148 1 : ASSERT_NE(stmt, nullptr);
149 1 : SemanticExecutor exec;
150 1 : auto source =
151 1 : exec.ExecuteQuery(MakeRequest("SELECT 1.0 / 0"), *stmt, catalog_.get());
152 1 : ASSERT_FALSE(source.ok());
153 1 : EXPECT_EQ(source.status().code(), absl::StatusCode::kInvalidArgument);
154 1 : EXPECT_EQ(GetSemanticErrorReason(source.status()),
155 1 : SemanticErrorReason::kDivisionByZero);
156 1 : }
157 :
158 1 : TEST_F(SemanticExecutorTest, Int64OverflowSurfacesReason) {
159 1 : const auto* stmt =
160 1 : Analyze("SELECT 9223372036854775807 + 1", MakeAnalyzerOptions());
161 1 : ASSERT_NE(stmt, nullptr);
162 1 : SemanticExecutor exec;
163 1 : auto source = exec.ExecuteQuery(
164 1 : MakeRequest("SELECT 9223372036854775807 + 1"), *stmt, catalog_.get());
165 1 : ASSERT_FALSE(source.ok());
166 1 : EXPECT_EQ(GetSemanticErrorReason(source.status()),
167 1 : SemanticErrorReason::kOverflow);
168 1 : }
169 :
170 1 : TEST_F(SemanticExecutorTest, SafeAddOverflowProducesNull) {
171 1 : auto cell = RunForFirstCell("SELECT SAFE_ADD(9223372036854775807, 1)");
172 2 : ASSERT_TRUE(cell.ok()) << cell.status();
173 1 : EXPECT_TRUE(cell->is_null());
174 1 : }
175 :
176 1 : TEST_F(SemanticExecutorTest, NamedParameterBindsAndArithmeticUses) {
177 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
178 1 : ASSERT_TRUE(
179 1 : options.AddQueryParameter("p", ::googlesql::types::Int64Type()).ok());
180 :
181 1 : const auto* stmt = Analyze("SELECT @p + 1", options);
182 1 : ASSERT_NE(stmt, nullptr);
183 1 : QueryRequest req = MakeRequest("SELECT @p + 1");
184 1 : QueryParameter p;
185 1 : p.name = "p";
186 1 : p.type_kind = "INT64";
187 1 : p.value_json = "40";
188 1 : req.parameters.push_back(p);
189 :
190 1 : SemanticExecutor exec;
191 1 : auto source = exec.ExecuteQuery(req, *stmt, catalog_.get());
192 2 : ASSERT_TRUE(source.ok()) << source.status();
193 1 : storage::Row row;
194 1 : auto has = (*source)->Next(&row);
195 2 : ASSERT_TRUE(has.ok()) << has.status();
196 1 : ASSERT_TRUE(*has);
197 1 : EXPECT_EQ(row.cells[0].int64_value(), 41);
198 1 : }
199 :
200 1 : TEST_F(SemanticExecutorTest, RejectsSelectWithFromShape) {
201 : // Add a fake table to the catalog so the analyzer can resolve
202 : // the FROM clause; the executor should still reject the shape.
203 1 : ::googlesql::SimpleTable* table =
204 1 : new ::googlesql::SimpleTable("t", {{"x", type_factory_->get_int64()}});
205 1 : catalog_->AddOwnedTable(table);
206 1 : const auto* stmt = Analyze("SELECT x FROM t", MakeAnalyzerOptions());
207 1 : ASSERT_NE(stmt, nullptr);
208 1 : SemanticExecutor exec;
209 1 : auto source =
210 1 : exec.ExecuteQuery(MakeRequest("SELECT x FROM t"), *stmt, catalog_.get());
211 1 : ASSERT_FALSE(source.ok());
212 1 : EXPECT_EQ(source.status().code(), absl::StatusCode::kUnimplemented);
213 1 : }
214 :
215 : // `docs/ENGINE_POLICY.md` Family 2. A
216 : // `ResolvedBarrierScan` wrapping a SingleRowScan is the
217 : // pipe-operator analog of `SELECT 1 + 2`; the barrier is the
218 : // analyzer's pipe-boundary marker and rows pass through
219 : // unchanged. `StripBarrierScans` peels the wrapper before
220 : // dispatch so the scalar-only evaluator handles the projection.
221 1 : TEST_F(SemanticExecutorTest, BarrierScanOverSingleRowPassesThrough) {
222 : // Direct construction: the surface SQL `SELECT 1 + 2 |> SELECT ...`
223 : // is not yet enabled in this fixture's analyzer, but the
224 : // `ResolvedQueryStmt(query=ResolvedProjectScan(input_scan=
225 : // ResolvedBarrierScan(input_scan=ResolvedSingleRowScan)))`
226 : // shape is what the analyzer would emit, so we build it
227 : // directly and feed it to the executor.
228 1 : auto single = ::googlesql::MakeResolvedSingleRowScan();
229 1 : auto barrier = ::googlesql::MakeResolvedBarrierScan(
230 1 : /*column_list=*/{}, std::move(single));
231 : // Project a literal 7 onto a fresh output column.
232 1 : ::googlesql::ResolvedColumn out_col(
233 1 : /*column_id=*/100,
234 1 : /*table_name=*/::googlesql::IdString::MakeGlobal("$query"),
235 1 : /*name=*/::googlesql::IdString::MakeGlobal("c"),
236 1 : type_factory_->get_int64());
237 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedComputedColumn>> exprs;
238 1 : exprs.push_back(::googlesql::MakeResolvedComputedColumn(
239 1 : out_col, ::googlesql::MakeResolvedLiteral(::googlesql::Value::Int64(7))));
240 1 : auto project = ::googlesql::MakeResolvedProjectScan(
241 1 : /*column_list=*/{out_col}, std::move(exprs), std::move(barrier));
242 1 : std::vector<std::unique_ptr<const ::googlesql::ResolvedOutputColumn>> outputs;
243 1 : outputs.push_back(
244 1 : ::googlesql::MakeResolvedOutputColumn(/*name=*/"c", out_col));
245 1 : auto query_stmt = ::googlesql::MakeResolvedQueryStmt(
246 1 : std::move(outputs), /*is_value_table=*/false, std::move(project));
247 :
248 1 : SemanticExecutor exec;
249 1 : QueryRequest req = MakeRequest("/* barrier shape; built directly */");
250 1 : auto source = exec.ExecuteQuery(req, *query_stmt, catalog_.get());
251 2 : ASSERT_TRUE(source.ok()) << source.status();
252 1 : storage::Row row;
253 1 : auto has = (*source)->Next(&row);
254 2 : ASSERT_TRUE(has.ok()) << has.status();
255 1 : ASSERT_TRUE(*has);
256 1 : ASSERT_EQ(row.cells.size(), 1u);
257 1 : EXPECT_EQ(row.cells[0].int64_value(), 7);
258 1 : }
259 :
260 1 : TEST_F(SemanticExecutorTest, UnnestWithOffsetEmitsRowPerElement) {
261 : // deferred work tracked in docs/ENGINE_POLICY.md: a
262 : // standalone `UNNEST(...) WITH OFFSET` flowing through the
263 : // semantic executor produces one row per element with two
264 : // columns (the element value + the 0-based offset).
265 1 : const std::string sql =
266 1 : "SELECT n, idx FROM UNNEST(['a', 'b', 'c']) AS n WITH OFFSET AS idx";
267 1 : const auto* stmt = Analyze(sql, MakeAnalyzerOptions());
268 1 : ASSERT_NE(stmt, nullptr);
269 1 : SemanticExecutor exec;
270 1 : auto source = exec.ExecuteQuery(MakeRequest(sql), *stmt, catalog_.get());
271 2 : ASSERT_TRUE(source.ok()) << source.status();
272 1 : ASSERT_EQ((*source)->schema().columns.size(), 2u);
273 1 : EXPECT_EQ((*source)->schema().columns[0].name, "n");
274 1 : EXPECT_EQ((*source)->schema().columns[1].name, "idx");
275 :
276 1 : storage::Row row;
277 4 : for (int i = 0; i < 3; ++i) {
278 3 : auto has = (*source)->Next(&row);
279 6 : ASSERT_TRUE(has.ok()) << has.status();
280 6 : ASSERT_TRUE(*has) << "expected row #" << i;
281 3 : ASSERT_EQ(row.cells.size(), 2u);
282 3 : EXPECT_EQ(row.cells[1].int64_value(), i);
283 3 : }
284 : // Confirm the stream ends after 3 elements.
285 1 : auto has = (*source)->Next(&row);
286 2 : ASSERT_TRUE(has.ok()) << has.status();
287 1 : EXPECT_FALSE(*has);
288 1 : }
289 :
290 1 : TEST_F(SemanticExecutorTest, OuterUnnestEmptyArrayEmitsNullRow) {
291 : // Family 2: an empty array under `is_outer=true` (the analyzer
292 : // synthesizes this for the `LEFT JOIN UNNEST(...) ON TRUE`
293 : // pattern, and for `WITH OFFSET` against an empty literal) emits
294 : // a single row whose element + offset are both NULL.
295 1 : const std::string sql =
296 1 : "SELECT n, idx FROM UNNEST(CAST([] AS ARRAY<INT64>)) AS n "
297 1 : "WITH OFFSET AS idx";
298 1 : const auto* stmt = Analyze(sql, MakeAnalyzerOptions());
299 1 : if (stmt == nullptr) {
300 0 : GTEST_SKIP() << "analyzer rejected empty-array literal; "
301 0 : "covered by array_scan_test.";
302 0 : }
303 1 : SemanticExecutor exec;
304 1 : auto source = exec.ExecuteQuery(MakeRequest(sql), *stmt, catalog_.get());
305 2 : ASSERT_TRUE(source.ok()) << source.status();
306 1 : storage::Row row;
307 1 : auto has = (*source)->Next(&row);
308 2 : ASSERT_TRUE(has.ok()) << has.status();
309 : // Inner UNNEST against empty array emits zero rows; outer would
310 : // emit one NULL row. `WITH OFFSET` without `is_outer` is inner.
311 1 : EXPECT_FALSE(*has);
312 1 : }
313 :
314 1 : TEST_F(SemanticExecutorTest, DmlSurfacesNotImplemented) {
315 1 : const auto* stmt = Analyze("SELECT 1", MakeAnalyzerOptions());
316 1 : ASSERT_NE(stmt, nullptr);
317 1 : SemanticExecutor exec;
318 1 : auto out = exec.ExecuteDml(MakeRequest("SELECT 1"), *stmt, catalog_.get());
319 1 : ASSERT_FALSE(out.ok());
320 1 : EXPECT_EQ(out.status().code(), absl::StatusCode::kUnimplemented);
321 1 : }
322 :
323 1 : TEST_F(SemanticExecutorTest, DdlSurfacesNotImplemented) {
324 1 : const auto* stmt = Analyze("SELECT 1", MakeAnalyzerOptions());
325 1 : ASSERT_NE(stmt, nullptr);
326 1 : SemanticExecutor exec;
327 1 : absl::Status out =
328 1 : exec.ExecuteDdl(MakeRequest("SELECT 1"), *stmt, catalog_.get());
329 1 : ASSERT_FALSE(out.ok());
330 1 : EXPECT_EQ(out.code(), absl::StatusCode::kUnimplemented);
331 1 : }
332 :
333 1 : TEST_F(SemanticExecutorTest, ChainedCteReferencesPriorEntry) {
334 1 : const std::string sql =
335 1 : "WITH base AS (SELECT 1 AS n UNION ALL SELECT 2 AS n), "
336 1 : " doubled AS (SELECT n * 2 AS m FROM base) "
337 1 : "SELECT SUM(m) AS total FROM doubled";
338 1 : const auto* stmt = Analyze(sql, MakeAnalyzerOptions());
339 1 : ASSERT_NE(stmt, nullptr);
340 1 : SemanticExecutor exec;
341 1 : auto source = exec.ExecuteQuery(MakeRequest(sql), *stmt, catalog_.get());
342 2 : ASSERT_TRUE(source.ok()) << source.status();
343 1 : storage::Row row;
344 1 : auto has = (*source)->Next(&row);
345 2 : ASSERT_TRUE(has.ok()) << has.status();
346 1 : ASSERT_TRUE(*has);
347 1 : ASSERT_EQ(row.cells.size(), 1u);
348 1 : EXPECT_EQ(row.cells[0].int64_value(), 6);
349 1 : }
350 :
351 1 : TEST_F(SemanticExecutorTest, ChainedCteWithRowNumberAnalyticScan) {
352 1 : const std::string sql =
353 1 : "WITH base AS (SELECT 1 AS id UNION ALL SELECT 1 AS id), "
354 1 : " ranked AS ("
355 1 : " SELECT id, ROW_NUMBER() OVER (PARTITION BY id ORDER BY id) AS rn "
356 1 : " FROM base"
357 1 : " ) "
358 1 : "SELECT COUNT(*) AS c FROM ranked WHERE rn = 1";
359 1 : const auto* stmt = Analyze(sql, MakeAnalyzerOptions());
360 1 : ASSERT_NE(stmt, nullptr);
361 1 : SemanticExecutor exec;
362 1 : auto source = exec.ExecuteQuery(MakeRequest(sql), *stmt, catalog_.get());
363 2 : ASSERT_TRUE(source.ok()) << source.status();
364 1 : storage::Row row;
365 1 : auto has = (*source)->Next(&row);
366 2 : ASSERT_TRUE(has.ok()) << has.status();
367 1 : ASSERT_TRUE(*has);
368 1 : ASSERT_EQ(row.cells.size(), 1u);
369 1 : EXPECT_EQ(row.cells[0].int64_value(), 1);
370 1 : }
371 :
372 : } // namespace
373 : } // namespace semantic
374 : } // namespace engine
375 : } // namespace backend
376 : } // namespace bigquery_emulator
|