Line data Source code
1 : // Crash-safety regression tests for client-reachable catalog paths.
2 : //
3 : // R4: Engine abort on duplicate catalog name during view replay (authorize-view
4 : // repeat). A duplicate bare view name across datasets used to abort the engine
5 : // via SimpleCatalog::AddTable during eager view replay. Views now resolve
6 : // lazily through FindProjectView; these tests assert catalog construction and
7 : // registration never abort on adversarial duplicate names or replay cycles.
8 : //
9 : // Plan:
10 : // .cursor/plans/conformance-hardening/07-reported-bug-regression-fixtures.plan.md
11 : // See also: .cursor/plans/conformance-hardening/03-engine-crash-safety.plan.md
12 :
13 : #include <cstdlib>
14 : #include <filesystem>
15 : #include <memory>
16 : #include <random>
17 : #include <string>
18 : #include <system_error>
19 : #include <utility>
20 :
21 : #include "absl/status/status.h"
22 : #include "absl/strings/str_cat.h"
23 : #include "backend/catalog/googlesql_catalog.h"
24 : #include "backend/catalog/udf_registration_catalog.h"
25 : #include "backend/catalog/udf_registry.h"
26 : #include "backend/catalog/view_registry.h"
27 : #include "backend/schema/schema.h"
28 : #include "backend/storage/duckdb/duckdb_storage.h"
29 : #include "googlesql/public/analyzer.h"
30 : #include "googlesql/public/analyzer_options.h"
31 : #include "googlesql/public/analyzer_output.h"
32 : #include "googlesql/public/function.h"
33 : #include "googlesql/public/function_signature.h"
34 : #include "googlesql/public/language_options.h"
35 : #include "googlesql/public/options.pb.h"
36 : #include "googlesql/public/simple_catalog.h"
37 : #include "googlesql/public/types/type_factory.h"
38 : #include "googlesql/resolved_ast/resolved_ast.h"
39 : #include "googlesql/resolved_ast/resolved_node_kind.pb.h"
40 : #include "gtest/gtest.h"
41 :
42 : namespace bigquery_emulator {
43 : namespace backend {
44 : namespace catalog {
45 : namespace {
46 :
47 : namespace fs = std::filesystem;
48 :
49 : const char* kProject = "proj_catalog_crash_safety";
50 :
51 31 : ::googlesql::LanguageOptions MakeLanguageOptions() {
52 31 : ::googlesql::LanguageOptions language;
53 31 : language.EnableMaximumLanguageFeatures();
54 31 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
55 31 : language.set_name_resolution_mode(::googlesql::NAME_RESOLUTION_DEFAULT);
56 31 : return language;
57 31 : }
58 :
59 9 : ::googlesql::AnalyzerOptions MakeAnalyzerOptions() {
60 9 : ::googlesql::AnalyzerOptions options(MakeLanguageOptions());
61 9 : options.set_error_message_mode(::googlesql::ERROR_MESSAGE_ONE_LINE);
62 9 : options.CreateDefaultArenasIfNotSet();
63 9 : options.mutable_language()->SetSupportsAllStatementKinds();
64 9 : return options;
65 9 : }
66 :
67 : std::unique_ptr<const ::googlesql::Function> MakeScalarFn(
68 1 : const std::string& name) {
69 1 : ::googlesql::FunctionSignature signature(
70 1 : ::googlesql::FunctionArgumentType(::googlesql::types::Int64Type()),
71 1 : /*arguments=*/{},
72 1 : /*context_id=*/static_cast<int64_t>(0));
73 1 : return std::make_unique<::googlesql::Function>(
74 1 : std::vector<std::string>{name},
75 1 : /*group=*/"External_function",
76 1 : ::googlesql::Function::SCALAR,
77 1 : std::vector<::googlesql::FunctionSignature>{signature});
78 1 : }
79 :
80 : class CatalogCrashSafetyTest : public ::testing::Test {
81 : protected:
82 4 : void SetUp() override {
83 4 : const char* tmpdir_env = std::getenv("TMPDIR");
84 4 : const std::string tmpdir = tmpdir_env != nullptr ? tmpdir_env : "/tmp";
85 4 : std::random_device rd;
86 4 : std::seed_seq seed{rd(), rd()};
87 4 : std::mt19937_64 rng(seed);
88 4 : data_dir_ =
89 4 : fs::path(tmpdir) / absl::StrCat("bqemu-catalog-crash-safety-", rng());
90 4 : std::error_code ec;
91 4 : fs::remove_all(data_dir_, ec);
92 4 : auto opened = storage::duckdb::DuckDBStorage::Open(data_dir_.string());
93 8 : ASSERT_TRUE(opened.ok()) << opened.status();
94 4 : storage_ = std::move(opened).value();
95 4 : ASSERT_TRUE(storage_->CreateDataset({kProject, "ds_base"}, "US").ok());
96 4 : schema::TableSchema schema;
97 4 : schema.columns.push_back({.name = "id",
98 4 : .type = schema::ColumnType::kInt64,
99 4 : .mode = schema::ColumnMode::kRequired});
100 4 : ASSERT_TRUE(
101 4 : storage_->CreateTable({kProject, "ds_base", "source"}, schema).ok());
102 4 : }
103 :
104 4 : void TearDown() override {
105 4 : storage_.reset();
106 4 : std::error_code ec;
107 4 : fs::remove_all(data_dir_, ec);
108 4 : }
109 :
110 : struct CatalogBundle {
111 : std::unique_ptr<::googlesql::TypeFactory> type_factory{};
112 : std::unique_ptr<GoogleSqlCatalog> catalog{};
113 : };
114 :
115 22 : CatalogBundle MakeCatalog(absl::string_view default_dataset = "") {
116 22 : auto type_factory = std::make_unique<::googlesql::TypeFactory>();
117 22 : auto catalog = std::make_unique<GoogleSqlCatalog>(kProject,
118 22 : storage_.get(),
119 22 : type_factory.get(),
120 22 : MakeLanguageOptions(),
121 22 : default_dataset);
122 22 : return {std::move(type_factory), std::move(catalog)};
123 22 : }
124 :
125 9 : absl::Status RegisterViewFromSql(absl::string_view sql) {
126 9 : CatalogBundle bundle = MakeCatalog();
127 9 : ::googlesql::TypeFactory analyze_tf;
128 9 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
129 9 : absl::Status analyzed = ::googlesql::AnalyzeStatement(
130 9 : sql, MakeAnalyzerOptions(), bundle.catalog.get(), &analyze_tf, &output);
131 9 : if (!analyzed.ok()) return analyzed;
132 9 : const ::googlesql::ResolvedStatement* stmt = output->resolved_statement();
133 9 : if (stmt == nullptr) {
134 0 : return absl::InternalError("analyzer returned null statement");
135 0 : }
136 9 : if (stmt->node_kind() != ::googlesql::RESOLVED_CREATE_VIEW_STMT) {
137 0 : return absl::InvalidArgumentError("expected CREATE VIEW statement");
138 0 : }
139 9 : const auto* create_view =
140 9 : stmt->GetAs<::googlesql::ResolvedCreateViewStmt>();
141 9 : if (create_view == nullptr) {
142 0 : return absl::InternalError("CREATE VIEW has null resolved stmt");
143 0 : }
144 9 : ::googlesql::TypeFactory* reg_tf = EnsureProjectTypeFactory(kProject);
145 9 : return RegisterProjectView(kProject,
146 9 : /*default_dataset_id=*/"",
147 9 : *create_view,
148 9 : std::move(output),
149 9 : reg_tf);
150 9 : }
151 :
152 : fs::path data_dir_{};
153 : std::unique_ptr<storage::duckdb::DuckDBStorage> storage_{};
154 : };
155 :
156 : TEST_F(CatalogCrashSafetyTest,
157 1 : DuplicateViewNameAcrossDatasetsDoesNotAbortCatalogConstruction) {
158 1 : ASSERT_TRUE(storage_->CreateDataset({kProject, "ds_a"}, "US").ok());
159 1 : ASSERT_TRUE(storage_->CreateDataset({kProject, "ds_b"}, "US").ok());
160 :
161 1 : ASSERT_TRUE(RegisterViewFromSql(
162 1 : "CREATE VIEW ds_a.profiles AS SELECT id FROM ds_base.source")
163 1 : .ok());
164 1 : ASSERT_TRUE(RegisterViewFromSql(
165 1 : "CREATE VIEW ds_b.profiles AS SELECT id FROM ds_base.source")
166 1 : .ok());
167 :
168 6 : for (int i = 0; i < 5; ++i) {
169 5 : CatalogBundle bundle = MakeCatalog();
170 5 : const ::googlesql::Table* table_a = nullptr;
171 5 : const ::googlesql::Table* table_b = nullptr;
172 5 : EXPECT_TRUE(bundle.catalog->FindTable({"ds_a", "profiles"}, &table_a).ok());
173 5 : EXPECT_TRUE(bundle.catalog->FindTable({"ds_b", "profiles"}, &table_b).ok());
174 5 : ASSERT_NE(table_a, nullptr);
175 5 : ASSERT_NE(table_b, nullptr);
176 5 : EXPECT_NE(table_a, table_b);
177 5 : }
178 1 : }
179 :
180 1 : TEST_F(CatalogCrashSafetyTest, ReRegisterViewReplacesWithoutAbort) {
181 1 : ASSERT_TRUE(storage_->CreateDataset({kProject, "ds_tenant"}, "US").ok());
182 1 : const std::string ddl =
183 1 : "CREATE VIEW ds_tenant.v AS SELECT id FROM ds_base.source";
184 1 : ASSERT_TRUE(RegisterViewFromSql(ddl).ok());
185 1 : ASSERT_TRUE(
186 1 : RegisterViewFromSql(
187 1 : "CREATE OR REPLACE VIEW ds_tenant.v AS SELECT id FROM ds_base.source")
188 1 : .ok());
189 :
190 4 : for (int i = 0; i < 3; ++i) {
191 3 : CatalogBundle bundle = MakeCatalog();
192 3 : const ::googlesql::Table* view = nullptr;
193 3 : EXPECT_TRUE(bundle.catalog->FindTable({"ds_tenant", "v"}, &view).ok());
194 3 : ASSERT_NE(view, nullptr);
195 3 : }
196 1 : }
197 :
198 1 : TEST_F(CatalogCrashSafetyTest, ReplayFunctionsTwiceOnSameCatalogDoesNotAbort) {
199 1 : const std::string fn_name = "crash_safety_ds.fn";
200 1 : ::googlesql::TypeFactory type_factory;
201 1 : ::googlesql::SimpleCatalog catalog(kProject, &type_factory);
202 :
203 1 : ASSERT_TRUE(RegisterProjectFunction(kProject,
204 1 : /*dataset_id=*/"",
205 1 : /*is_temp=*/false,
206 1 : /*analyzer_output=*/nullptr,
207 1 : MakeScalarFn(fn_name))
208 1 : .ok());
209 1 : ReplayFunctionsIntoCatalog(kProject, catalog);
210 1 : ReplayFunctionsIntoCatalog(kProject, catalog);
211 :
212 1 : const ::googlesql::Function* fn = nullptr;
213 1 : ASSERT_TRUE(catalog.GetFunction(fn_name, &fn).ok());
214 1 : ASSERT_NE(fn, nullptr);
215 1 : }
216 :
217 : TEST_F(CatalogCrashSafetyTest,
218 1 : RegistrationCatalogSurvivesRepeatedViewAuthorizeCycles) {
219 1 : ASSERT_TRUE(storage_->CreateDataset({kProject, "ds_main"}, "US").ok());
220 1 : ASSERT_TRUE(storage_->CreateDataset({kProject, "ds_tenant"}, "US").ok());
221 :
222 1 : ::googlesql::TypeFactory reg_tf;
223 1 : const ::googlesql::LanguageOptions language = MakeCatalogLanguageOptions();
224 :
225 6 : for (int i = 0; i < 5; ++i) {
226 5 : ASSERT_TRUE(RegisterViewFromSql(
227 5 : "CREATE OR REPLACE VIEW ds_tenant.v AS SELECT id FROM "
228 5 : "ds_base.source")
229 5 : .ok());
230 5 : GoogleSqlCatalog* reg_catalog = nullptr;
231 5 : reg_catalog = GetOrCreateRegistrationCatalog(
232 5 : kProject, storage_.get(), ®_tf, language, "ds_tenant");
233 5 : ASSERT_NE(reg_catalog, nullptr);
234 :
235 5 : CatalogBundle query_catalog = MakeCatalog("ds_tenant");
236 5 : const ::googlesql::Table* view = nullptr;
237 5 : EXPECT_TRUE(
238 5 : query_catalog.catalog->FindTable({"ds_tenant", "v"}, &view).ok());
239 5 : ASSERT_NE(view, nullptr);
240 5 : }
241 1 : }
242 :
243 : } // namespace
244 : } // namespace catalog
245 : } // namespace backend
246 : } // namespace bigquery_emulator
|