LCOV - code coverage report
Current view: top level - backend/storage/duckdb - duckdb_storage.h (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 0.0 % 3 0
Test Date: 2026-07-02 21:01:18 Functions: 0.0 % 1 0

            Line data    Source code
       1              : #ifndef BIGQUERY_EMULATOR_BACKEND_STORAGE_DUCKDB_DUCKDB_STORAGE_H_
       2              : #define BIGQUERY_EMULATOR_BACKEND_STORAGE_DUCKDB_DUCKDB_STORAGE_H_
       3              : 
       4              : // DuckDBStorage is the persistent, file-backed `Storage` implementation.
       5              : //
       6              : // Layout under `data_dir`:
       7              : //
       8              : //   <data_dir>/
       9              : //     catalog.duckdb                            # DuckDB catalog file
      10              : //     <project_id>/                             # one dir per project
      11              : //       <dataset_id>/                           # one dir per dataset
      12              : //         _dataset.meta.json                    # dataset-level metadata
      13              : //         <table_id>.parquet                    # data file
      14              : //         <table_id>.meta.json                  # per-table sidecar
      15              : //
      16              : // The DuckDB catalog file tracks dataset existence (as DuckDB schemas)
      17              : // and table existence (as DuckDB views over the matching parquet file).
      18              : // BigQuery-specific metadata that does not fit cleanly in DuckDB
      19              : // (description, labels, friendlyName, etag, and the BigQuery-typed
      20              : // schema) lives in the JSON sidecars so a developer can inspect, edit,
      21              : // or hand-author a dataset/table without going through the emulator.
      22              : //
      23              : // This header is the *core* skeleton: it owns the connection, the
      24              : // directory layout, the metadata sidecar, and dataset/table CRUD.
      25              : // The actual Parquet I/O for `AppendRows` / `ScanRows` lands in the
      26              : // follow-up plan `duckdb-storage-ddl_p1e2f3a4`; both methods return
      27              : // UNIMPLEMENTED until then.
      28              : //
      29              : // Concurrency: every public method acquires a single absl::Mutex.
      30              : // DuckDB itself is thread-safe per-connection but we serialize at
      31              : // the C++ level so dataset / table directory mutations stay in
      32              : // lockstep with catalog rows.
      33              : 
      34              : #include <filesystem>
      35              : #include <optional>
      36              : #include <string>
      37              : #include <vector>
      38              : 
      39              : #include "absl/base/thread_annotations.h"
      40              : #include "absl/status/status.h"
      41              : #include "absl/status/statusor.h"
      42              : #include "absl/strings/string_view.h"
      43              : #include "absl/synchronization/mutex.h"
      44              : #include "absl/types/span.h"
      45              : #include "backend/schema/schema.h"
      46              : #include "backend/storage/storage.h"
      47              : 
      48              : namespace bigquery_emulator {
      49              : namespace backend {
      50              : namespace storage {
      51              : namespace duckdb {
      52              : 
      53              : class DuckDBStorage : public Storage {
      54              :  public:
      55              :   // Constructs a DuckDBStorage rooted at `data_dir`. The directory is
      56              :   // created (recursively) if it does not exist. Opens a DuckDB
      57              :   // connection backed by `<data_dir>/catalog.duckdb` so dataset /
      58              :   // table existence survives process restarts.
      59              :   //
      60              :   // Returns INVALID_ARGUMENT when `data_dir` is empty, FAILED_PRECONDITION
      61              :   // when the directory can not be created (e.g. permission denied), or
      62              :   // INTERNAL when DuckDB itself refuses to open the catalog file. On
      63              :   // success the caller owns the returned unique_ptr; the connection
      64              :   // closes on destruction.
      65              :   static absl::StatusOr<std::unique_ptr<DuckDBStorage>> Open(
      66              :       absl::string_view data_dir);
      67              : 
      68              :   ~DuckDBStorage() override;
      69              : 
      70              :   DuckDBStorage(const DuckDBStorage&) = delete;
      71              :   DuckDBStorage& operator=(const DuckDBStorage&) = delete;
      72              : 
      73              :   // Path the storage was opened with. Stable for the lifetime of the
      74              :   // instance; exposed mainly for tests / logs.
      75            0 :   absl::string_view data_dir() const override {
      76            0 :     return data_dir_;
      77            0 :   }
      78              : 
      79              :   // ------------------------------------------------------------------
      80              :   // Storage interface
      81              :   // ------------------------------------------------------------------
      82              :   absl::Status CreateDataset(const DatasetId& id,
      83              :                              absl::string_view location) override;
      84              :   absl::Status DropDataset(const DatasetId& id,
      85              :                            bool delete_contents,
      86              :                            absl::string_view rest_metadata_json = {}) override;
      87              :   absl::Status RestoreDataset(const DatasetId& id,
      88              :                               std::int64_t deleted_ms = 0) override;
      89              :   absl::StatusOr<std::vector<DatasetId>> ListDatasets(
      90              :       absl::string_view project_id) const override;
      91              : 
      92              :   absl::Status CreateTable(const TableId& id,
      93              :                            const schema::TableSchema& schema) override;
      94              :   absl::Status DropTable(const TableId& id) override;
      95              :   absl::Status RestoreTable(const TableId& id,
      96              :                             std::int64_t deleted_ms = 0) override;
      97              :   absl::StatusOr<std::vector<TableId>> ListTables(
      98              :       const DatasetId& dataset_id) const override;
      99              : 
     100              :   absl::StatusOr<schema::TableSchema> GetSchema(
     101              :       const TableId& id) const override;
     102              : 
     103              :   // The core skeleton returns UNIMPLEMENTED for these two. The DDL
     104              :   // plan (`duckdb-storage-ddl_p1e2f3a4`) lowers them onto Parquet
     105              :   // I/O via DuckDB's `read_parquet` + INSERT statements.
     106              :   absl::Status AppendRows(const TableId& id,
     107              :                           absl::Span<const Row> rows) override;
     108              :   absl::Status OverwriteRows(const TableId& id,
     109              :                              absl::Span<const Row> rows) override;
     110              :   absl::StatusOr<std::unique_ptr<RowIterator>> ScanRows(
     111              :       const TableId& id) const override;
     112              :   absl::StatusOr<std::unique_ptr<RowIterator>> CreateReadStream(
     113              :       const TableId& id, const ReadFilter& filter) const override;
     114              :   absl::StatusOr<std::int64_t> CountRows(const TableId& id) const override;
     115              : 
     116              :   std::optional<std::string> ParquetSnapshotPath(
     117              :       const TableId& id) const override;
     118              :   absl::StatusOr<std::optional<std::string>> ParquetSnapshotPathAt(
     119              :       const TableId& id, std::int64_t as_of_ms) const override;
     120              : 
     121              :   absl::Status UpsertRoutine(const RoutineRecord& record) override;
     122              :   absl::Status DeleteRoutine(const RoutineId& id) override;
     123              :   absl::StatusOr<RoutineRecord> GetRoutine(const RoutineId& id) const override;
     124              :   absl::StatusOr<std::vector<RoutineRecord>> ListRoutines(
     125              :       const DatasetId& dataset_id) const override;
     126              :   absl::StatusOr<std::vector<RoutineRecord>> ListAllRoutines() const override;
     127              : 
     128              :   absl::Status UpsertView(const ViewRecord& record) override;
     129              :   absl::Status DeleteView(const ViewId& id) override;
     130              :   absl::StatusOr<std::vector<ViewRecord>> ListAllViews() const override;
     131              :   absl::StatusOr<TableResourceInfo> GetTableResourceInfo(
     132              :       const TableId& id) const override;
     133              : 
     134              :   absl::StatusOr<TableGovernance> GetTableGovernance(
     135              :       const TableId& id) const override;
     136              :   absl::Status UpsertRowAccessPolicy(
     137              :       const TableId& id, const RowAccessPolicyRecord& policy) override;
     138              :   absl::Status DeleteRowAccessPolicy(const TableId& id,
     139              :                                      absl::string_view policy_id) override;
     140              :   absl::Status SetColumnGovernance(
     141              :       const TableId& id,
     142              :       absl::string_view column_name,
     143              :       const ColumnGovernanceRecord& column) override;
     144              : 
     145              :   // Ensures catalog metadata tables (e.g. `__bqemu_routines`) exist.
     146              :   // Called from `Open` and idempotently before routine CRUD.
     147              :   absl::Status InitCatalogTables();
     148              : 
     149              :   absl::StatusOr<std::string> GetDatasetRestMetadataJson(
     150              :       const DatasetId& id) const;
     151              : 
     152              :   // Dataset tombstone helpers (caller must hold mu_).
     153              :   absl::Status SnapshotDatasetRegistryForTombstoneLocked(
     154              :       const DatasetId& id, const std::filesystem::path& tombstone_dir);
     155              :   absl::Status RestoreDatasetRegistryFromTombstoneLocked(
     156              :       const DatasetId& id, const std::filesystem::path& tombstone_dir);
     157              :   absl::Status PurgeDatasetRegistryRowsLocked(const DatasetId& id);
     158              :   absl::StatusOr<std::string> GetDatasetRestMetadataJsonLocked(
     159              :       const DatasetId& id) const;
     160              : 
     161              :   // Pimpl: keeps the DuckDB C handles out of this header so the
     162              :   // engine-agnostic Storage signatures stay enforceable from the
     163              :   // include graph alone (callers cannot accidentally reach into
     164              :   // `duckdb_database` / `duckdb_connection`). Public so the
     165              :   // translation unit's helper functions can take an `Impl*` directly
     166              :   // — the struct itself is only ever defined inside duckdb_storage.cc.
     167              :   struct Impl;
     168              : 
     169              :  private:
     170              :   DuckDBStorage(std::string data_dir, std::unique_ptr<Impl> impl);
     171              : 
     172              :   // Filesystem layout helpers. All take ids by string_view and emit
     173              :   // absolute paths under `data_dir_`.
     174              :   std::string DatasetDir(absl::string_view project_id,
     175              :                          absl::string_view dataset_id) const;
     176              :   std::string DatasetDir(const DatasetId& id) const;
     177              :   std::string DatasetMetaPath(const DatasetId& id) const;
     178              :   std::string TableMetaPath(const TableId& id) const;
     179              :   std::string TableParquetPath(const TableId& id) const;
     180              : 
     181              :   std::string TableGovernancePath(const TableId& id) const;
     182              : 
     183              :   absl::Status PutTableGovernance(const TableId& id,
     184              :                                   const TableGovernance& gov);
     185              : 
     186              :   // Stable DuckDB schema name for a (project, dataset) pair. We can't
     187              :   // just use the dataset_id because two projects may share a dataset
     188              :   // id; collapse them into one safe identifier so DuckDB stays happy.
     189              :   static std::string DuckDBSchemaName(absl::string_view project_id,
     190              :                                       absl::string_view dataset_id);
     191              :   static std::string DuckDBSchemaName(const DatasetId& id);
     192              : 
     193              :   std::string data_dir_;
     194              :   mutable absl::Mutex mu_;
     195              :   // The pointer is set once at construction and never reassigned; the
     196              :   // *contents* of the connection are guarded by `mu_` because DuckDB
     197              :   // is thread-safe per-connection but the dataset/table directory
     198              :   // mutations need to stay coherent with the DuckDB catalog rows we
     199              :   // emit alongside them.
     200              :   std::unique_ptr<Impl> impl_{};
     201              : };
     202              : 
     203              : }  // namespace duckdb
     204              : }  // namespace storage
     205              : }  // namespace backend
     206              : }  // namespace bigquery_emulator
     207              : 
     208              : #endif  // BIGQUERY_EMULATOR_BACKEND_STORAGE_DUCKDB_DUCKDB_STORAGE_H_
        

Generated by: LCOV version 2.0-1