Line data Source code
1 : // Per-handler unit tests for the control-op executor. Each test
2 : // drives a single `Resolved*Stmt` shape directly through the
3 : // executor's `ExecuteDdl` surface (the same surface the coordinator
4 : // dispatches to for `kControlOp` rows in
5 : // `node_dispositions.yaml`) and asserts the catalog mutation lands
6 : // on the underlying `Storage` backend.
7 : //
8 : // We deliberately avoid the gateway path here -- the gateway's
9 : // statementType envelope is exercised by `gateway/e2e/` integration
10 : // tests. This file is the per-handler "did the storage row change"
11 : // pin; it lets a regression in the handler's name-path resolution,
12 : // schema mapping, or storage-write call surface as a unit-test
13 : // failure first.
14 : //
15 : // Plan ownership: `docs/ENGINE_POLICY.md` Tests
16 : // section.
17 :
18 : #include "backend/engine/control/control_op_executor.h"
19 :
20 : #include <cstdint>
21 : #include <cstdlib>
22 : #include <filesystem>
23 : #include <memory>
24 : #include <random>
25 : #include <string>
26 : #include <system_error>
27 : #include <utility>
28 : #include <vector>
29 :
30 : #include "absl/status/status.h"
31 : #include "absl/status/statusor.h"
32 : #include "absl/strings/match.h"
33 : #include "absl/strings/str_cat.h"
34 : #include "absl/strings/string_view.h"
35 : #include "backend/catalog/googlesql_catalog.h"
36 : #include "backend/engine/engine.h"
37 : #include "backend/schema/schema.h"
38 : #include "backend/storage/duckdb/duckdb_storage.h"
39 : #include "backend/storage/storage.h"
40 : #include "googlesql/public/analyzer.h"
41 : #include "googlesql/public/analyzer_options.h"
42 : #include "googlesql/public/analyzer_output.h"
43 : #include "googlesql/public/language_options.h"
44 : #include "googlesql/public/options.pb.h"
45 : #include "googlesql/public/types/type_factory.h"
46 : #include "googlesql/resolved_ast/resolved_ast.h"
47 : #include "gtest/gtest.h"
48 :
49 : namespace bigquery_emulator {
50 : namespace backend {
51 : namespace engine {
52 : namespace control {
53 : namespace {
54 :
55 : namespace fs = std::filesystem;
56 :
57 40 : ::googlesql::LanguageOptions MakeLanguageOptions() {
58 40 : ::googlesql::LanguageOptions language;
59 40 : language.EnableMaximumLanguageFeatures();
60 40 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
61 40 : language.set_name_resolution_mode(::googlesql::NAME_RESOLUTION_DEFAULT);
62 40 : return language;
63 40 : }
64 :
65 : // Mirrors `LocalCoordinatorEngine`'s analyzer setup with the
66 : // supports-all-statements allowlist flipped on so DDL parses.
67 20 : ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
68 20 : ::googlesql::AnalyzerOptions options(MakeLanguageOptions());
69 20 : options.set_error_message_mode(::googlesql::ERROR_MESSAGE_ONE_LINE);
70 20 : options.set_attach_error_location_payload(true);
71 20 : options.CreateDefaultArenasIfNotSet();
72 20 : options.mutable_language()->SetSupportsAllStatementKinds();
73 20 : return options;
74 20 : }
75 :
76 : class ControlOpExecutorTest : public ::testing::Test {
77 : protected:
78 17 : void SetUp() override {
79 17 : const char* tmpdir_env = std::getenv("TMPDIR");
80 17 : const std::string tmpdir = tmpdir_env != nullptr ? tmpdir_env : "/tmp";
81 17 : std::random_device rd;
82 17 : std::seed_seq seed{rd(), rd()};
83 17 : std::mt19937_64 rng(seed);
84 17 : data_dir_ =
85 17 : fs::path(tmpdir) / absl::StrCat("bqemu-control-op-test-", rng());
86 17 : std::error_code ec;
87 17 : fs::remove_all(data_dir_, ec);
88 17 : auto opened = storage::duckdb::DuckDBStorage::Open(data_dir_.string());
89 34 : ASSERT_TRUE(opened.ok()) << opened.status();
90 17 : storage_ = std::move(opened).value();
91 17 : executor_ = std::make_unique<ControlOpExecutor>(storage_.get());
92 17 : ASSERT_TRUE(storage_->CreateDataset({"proj-test", "ds"}, "US").ok());
93 17 : }
94 :
95 17 : void TearDown() override {
96 17 : executor_.reset();
97 17 : storage_.reset();
98 17 : std::error_code ec;
99 17 : fs::remove_all(data_dir_, ec);
100 17 : }
101 :
102 20 : QueryRequest MakeRequest(absl::string_view sql) {
103 20 : QueryRequest req;
104 20 : req.project_id = "proj-test";
105 20 : req.sql = std::string(sql);
106 20 : return req;
107 20 : }
108 :
109 : // Two-column people table (id INT64 REQUIRED, name STRING
110 : // NULLABLE). Same fixture the engine tests use so the executor's
111 : // post-mutation schema stays aligned with the engine path's.
112 2 : void CreatePeopleTable() {
113 2 : schema::TableSchema bq_schema;
114 2 : schema::ColumnSchema id;
115 2 : id.name = "id";
116 2 : id.type = schema::ColumnType::kInt64;
117 2 : id.mode = schema::ColumnMode::kRequired;
118 2 : bq_schema.columns.push_back(id);
119 2 : schema::ColumnSchema name;
120 2 : name.name = "name";
121 2 : name.type = schema::ColumnType::kString;
122 2 : name.mode = schema::ColumnMode::kNullable;
123 2 : bq_schema.columns.push_back(name);
124 2 : ASSERT_TRUE(
125 2 : storage_->CreateTable({"proj-test", "ds", "people"}, bq_schema).ok());
126 :
127 6 : auto make_row = [](int64_t id_val, std::string name_val) {
128 6 : storage::Row r;
129 6 : r.cells = {
130 6 : storage::Value::Int64(id_val),
131 6 : storage::Value::String(std::move(name_val)),
132 6 : };
133 6 : return r;
134 6 : };
135 2 : std::vector<storage::Row> rows = {
136 2 : make_row(1, "ada"),
137 2 : make_row(2, "linus"),
138 2 : make_row(3, "grace"),
139 2 : };
140 2 : ASSERT_TRUE(storage_
141 2 : ->AppendRows({"proj-test", "ds", "people"},
142 2 : absl::MakeConstSpan(rows))
143 2 : .ok());
144 2 : }
145 :
146 : struct CatalogBundle {
147 : std::unique_ptr<::googlesql::TypeFactory> type_factory{};
148 : std::unique_ptr<catalog::GoogleSqlCatalog> catalog{};
149 : };
150 20 : CatalogBundle MakeCatalog() {
151 20 : auto type_factory = std::make_unique<::googlesql::TypeFactory>();
152 20 : auto catalog = std::make_unique<catalog::GoogleSqlCatalog>(
153 20 : "proj-test", storage_.get(), type_factory.get(), MakeLanguageOptions());
154 20 : return {std::move(type_factory), std::move(catalog)};
155 20 : }
156 :
157 : // Analyze `sql` and run it through `ExecuteDdl`, returning the
158 : // status the executor surfaces. The `AnalyzerOutput` outlives the
159 : // call by virtue of being held in the local; the executor only
160 : // touches the resolved AST during the call.
161 19 : absl::Status RunDdl(absl::string_view sql) {
162 19 : CatalogBundle bundle = MakeCatalog();
163 19 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
164 19 : ::googlesql::TypeFactory type_factory;
165 19 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
166 19 : absl::Status analyze = ::googlesql::AnalyzeStatement(
167 19 : sql, options, bundle.catalog.get(), &type_factory, &output);
168 19 : if (!analyze.ok()) return analyze;
169 19 : if (output == nullptr || output->resolved_statement() == nullptr) {
170 0 : return absl::InternalError(
171 0 : "ControlOpExecutorTest::RunDdl: analyzer produced no resolved "
172 0 : "statement");
173 0 : }
174 19 : return executor_->ExecuteDdl(
175 19 : MakeRequest(sql), *output->resolved_statement(), bundle.catalog.get());
176 19 : }
177 :
178 : fs::path data_dir_{};
179 : std::unique_ptr<storage::duckdb::DuckDBStorage> storage_{};
180 : std::unique_ptr<ControlOpExecutor> executor_{};
181 : };
182 :
183 : // --- CREATE TABLE --------------------------------------------------------
184 :
185 1 : TEST_F(ControlOpExecutorTest, CreateTableWritesSchemaToStorage) {
186 1 : absl::Status s = RunDdl("CREATE TABLE ds.t (id INT64, name STRING)");
187 2 : ASSERT_TRUE(s.ok()) << s;
188 :
189 1 : auto schema = storage_->GetSchema({"proj-test", "ds", "t"});
190 2 : ASSERT_TRUE(schema.ok()) << schema.status();
191 1 : ASSERT_EQ(schema->columns.size(), 2u);
192 1 : EXPECT_EQ(schema->columns[0].name, "id");
193 1 : EXPECT_EQ(schema->columns[0].type, schema::ColumnType::kInt64);
194 1 : EXPECT_EQ(schema->columns[1].name, "name");
195 1 : EXPECT_EQ(schema->columns[1].type, schema::ColumnType::kString);
196 1 : }
197 :
198 1 : TEST_F(ControlOpExecutorTest, CreateTableWithNestedStructColumn) {
199 1 : absl::Status s =
200 1 : RunDdl("CREATE TABLE ds.t (k INT64, s STRUCT<a INT64, b STRING>)");
201 2 : ASSERT_TRUE(s.ok()) << s;
202 :
203 1 : auto schema = storage_->GetSchema({"proj-test", "ds", "t"});
204 2 : ASSERT_TRUE(schema.ok()) << schema.status();
205 1 : ASSERT_EQ(schema->columns.size(), 2u);
206 1 : EXPECT_EQ(schema->columns[0].name, "k");
207 1 : EXPECT_EQ(schema->columns[0].type, schema::ColumnType::kInt64);
208 1 : ASSERT_EQ(schema->columns[1].name, "s");
209 1 : EXPECT_EQ(schema->columns[1].type, schema::ColumnType::kStruct);
210 1 : ASSERT_EQ(schema->columns[1].fields.size(), 2u);
211 1 : EXPECT_EQ(schema->columns[1].fields[0].name, "a");
212 1 : EXPECT_EQ(schema->columns[1].fields[0].type, schema::ColumnType::kInt64);
213 1 : EXPECT_EQ(schema->columns[1].fields[1].name, "b");
214 1 : EXPECT_EQ(schema->columns[1].fields[1].type, schema::ColumnType::kString);
215 1 : }
216 :
217 1 : TEST_F(ControlOpExecutorTest, CreateTableHonoursNotNullAnnotation) {
218 1 : absl::Status s = RunDdl("CREATE TABLE ds.t (id INT64 NOT NULL, name STRING)");
219 2 : ASSERT_TRUE(s.ok()) << s;
220 :
221 1 : auto schema = storage_->GetSchema({"proj-test", "ds", "t"});
222 2 : ASSERT_TRUE(schema.ok()) << schema.status();
223 1 : ASSERT_EQ(schema->columns.size(), 2u);
224 1 : EXPECT_EQ(schema->columns[0].mode, schema::ColumnMode::kRequired);
225 1 : EXPECT_EQ(schema->columns[1].mode, schema::ColumnMode::kNullable);
226 1 : }
227 :
228 1 : TEST_F(ControlOpExecutorTest, CreateTableDuplicateSurfacesAlreadyExists) {
229 1 : ASSERT_TRUE(RunDdl("CREATE TABLE ds.t (id INT64)").ok());
230 1 : absl::Status second = RunDdl("CREATE TABLE ds.t (id INT64)");
231 1 : ASSERT_FALSE(second.ok());
232 2 : EXPECT_EQ(second.code(), absl::StatusCode::kAlreadyExists) << second;
233 1 : }
234 :
235 1 : TEST_F(ControlOpExecutorTest, CreateTableIfNotExistsSwallowsExisting) {
236 1 : ASSERT_TRUE(RunDdl("CREATE TABLE ds.t (id INT64)").ok());
237 1 : absl::Status second = RunDdl("CREATE TABLE IF NOT EXISTS ds.t (id INT64)");
238 2 : EXPECT_TRUE(second.ok()) << second;
239 1 : }
240 :
241 1 : TEST_F(ControlOpExecutorTest, CreateTableAutoCreatesMissingDataset) {
242 1 : absl::Status s = RunDdl("CREATE TABLE fresh_ds.t (id INT64)");
243 2 : ASSERT_TRUE(s.ok()) << s;
244 1 : auto schema = storage_->GetSchema({"proj-test", "fresh_ds", "t"});
245 2 : ASSERT_TRUE(schema.ok()) << schema.status();
246 1 : }
247 :
248 1 : TEST_F(ControlOpExecutorTest, CreateTableOneSegmentUsesDefaultDataset) {
249 1 : CatalogBundle bundle = MakeCatalog();
250 1 : ::googlesql::AnalyzerOptions options = MakeAnalyzerOptions();
251 1 : ::googlesql::TypeFactory type_factory;
252 1 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
253 1 : const std::string sql = "CREATE TABLE typed (i INT64)";
254 1 : ASSERT_TRUE(::googlesql::AnalyzeStatement(
255 1 : sql, options, bundle.catalog.get(), &type_factory, &output)
256 1 : .ok());
257 1 : QueryRequest req = MakeRequest(sql);
258 1 : req.default_dataset_id = "_default";
259 1 : absl::Status s = executor_->ExecuteDdl(
260 1 : req, *output->resolved_statement(), bundle.catalog.get());
261 2 : ASSERT_TRUE(s.ok()) << s;
262 1 : auto schema = storage_->GetSchema({"proj-test", "_default", "typed"});
263 2 : ASSERT_TRUE(schema.ok()) << schema.status();
264 1 : }
265 :
266 : // Regression for the recidiviz-fork report: `CREATE OR REPLACE TABLE
267 : // `ds.t`` arrives from the analyzer as a single dotted name-path
268 : // segment ("ds.t"). Production BigQuery splits the backtick-quoted
269 : // path into dataset/table; the engine must too, instead of rejecting
270 : // it as a bogus one-segment name with "no defaultDataset".
271 1 : TEST_F(ControlOpExecutorTest, CreateTableBacktickDatasetQualifiedPath) {
272 1 : absl::Status s = RunDdl("CREATE OR REPLACE TABLE `ds.t` (id INT64)");
273 2 : ASSERT_TRUE(s.ok()) << s;
274 1 : auto schema = storage_->GetSchema({"proj-test", "ds", "t"});
275 2 : ASSERT_TRUE(schema.ok()) << schema.status();
276 1 : ASSERT_EQ(schema->columns.size(), 1u);
277 1 : EXPECT_EQ(schema->columns[0].name, "id");
278 1 : }
279 :
280 : // A fully backtick-quoted `project.dataset.table` path likewise lands
281 : // as one dotted segment; it must split into all three components and
282 : // override the default project.
283 1 : TEST_F(ControlOpExecutorTest, CreateTableBacktickFullyQualifiedPath) {
284 1 : absl::Status s =
285 1 : RunDdl("CREATE TABLE `proj-test.fresh_ds.t` (id INT64, name STRING)");
286 2 : ASSERT_TRUE(s.ok()) << s;
287 1 : auto schema = storage_->GetSchema({"proj-test", "fresh_ds", "t"});
288 2 : ASSERT_TRUE(schema.ok()) << schema.status();
289 1 : ASSERT_EQ(schema->columns.size(), 2u);
290 1 : }
291 :
292 1 : TEST_F(ControlOpExecutorTest, CreateOrReplaceTableReplacesExisting) {
293 1 : ASSERT_TRUE(RunDdl("CREATE TABLE ds.t (id INT64)").ok());
294 1 : absl::Status second =
295 1 : RunDdl("CREATE OR REPLACE TABLE ds.t (id INT64, label STRING)");
296 2 : ASSERT_TRUE(second.ok()) << second;
297 :
298 1 : auto schema = storage_->GetSchema({"proj-test", "ds", "t"});
299 2 : ASSERT_TRUE(schema.ok()) << schema.status();
300 1 : ASSERT_EQ(schema->columns.size(), 2u);
301 1 : EXPECT_EQ(schema->columns[1].name, "label");
302 1 : }
303 :
304 : // --- DROP TABLE ----------------------------------------------------------
305 :
306 1 : TEST_F(ControlOpExecutorTest, DropTableRemovesStorageTable) {
307 1 : CreatePeopleTable();
308 1 : absl::Status s = RunDdl("DROP TABLE ds.people");
309 2 : ASSERT_TRUE(s.ok()) << s;
310 1 : auto schema = storage_->GetSchema({"proj-test", "ds", "people"});
311 1 : ASSERT_FALSE(schema.ok());
312 2 : EXPECT_EQ(schema.status().code(), absl::StatusCode::kNotFound)
313 2 : << schema.status();
314 1 : }
315 :
316 1 : TEST_F(ControlOpExecutorTest, DropTableIfExistsSwallowsMissingTable) {
317 1 : absl::Status s = RunDdl("DROP TABLE IF EXISTS ds.absent");
318 2 : EXPECT_TRUE(s.ok()) << s;
319 1 : }
320 :
321 1 : TEST_F(ControlOpExecutorTest, DropTableMissingSurfacesNotFound) {
322 1 : absl::Status s = RunDdl("DROP TABLE ds.absent");
323 1 : ASSERT_FALSE(s.ok());
324 2 : EXPECT_EQ(s.code(), absl::StatusCode::kNotFound) << s;
325 1 : }
326 :
327 1 : TEST_F(ControlOpExecutorTest, DropViewMissingSurfacesNotFound) {
328 1 : absl::Status s = RunDdl("DROP VIEW ds.absent_view");
329 1 : ASSERT_FALSE(s.ok());
330 2 : EXPECT_EQ(s.code(), absl::StatusCode::kNotFound) << s;
331 1 : }
332 :
333 1 : TEST_F(ControlOpExecutorTest, DropViewIfExistsSwallowsMissingView) {
334 1 : absl::Status s = RunDdl("DROP VIEW IF EXISTS ds.absent");
335 2 : EXPECT_TRUE(s.ok()) << s;
336 1 : }
337 :
338 : // --- ANALYZE -------------------------------------------------------------
339 :
340 1 : TEST_F(ControlOpExecutorTest, AnalyzeKnownTableSurfacesUnimplemented) {
341 1 : CreatePeopleTable();
342 1 : absl::Status s = RunDdl("ANALYZE ds.people");
343 2 : EXPECT_EQ(s.code(), absl::StatusCode::kUnimplemented) << s;
344 2 : EXPECT_TRUE(absl::StrContains(s.message(), "AnalyzeStmt")) << s;
345 1 : }
346 :
347 1 : TEST_F(ControlOpExecutorTest, AnalyzeWithoutTablesSurfacesUnimplemented) {
348 1 : absl::Status s = RunDdl("ANALYZE");
349 2 : EXPECT_EQ(s.code(), absl::StatusCode::kUnimplemented) << s;
350 2 : EXPECT_TRUE(absl::StrContains(s.message(), "AnalyzeStmt")) << s;
351 1 : }
352 :
353 : } // namespace
354 : } // namespace control
355 : } // namespace engine
356 : } // namespace backend
357 : } // namespace bigquery_emulator
|