Line data Source code
1 : #ifndef BIGQUERY_EMULATOR_BACKEND_CATALOG_STORAGE_TABLE_H_
2 : #define BIGQUERY_EMULATOR_BACKEND_CATALOG_STORAGE_TABLE_H_
3 :
4 : // StorageTable is the `googlesql::Table` adapter that exposes a row
5 : // stream out of the active `backend::storage::Storage` to any
6 : // GoogleSQL consumer that pulls rows through
7 : // `EvaluatorTableIterator`. It is the execution-side counterpart of
8 : // the analysis-only `SimpleTable` instances `GoogleSqlCatalog`
9 : // produces during analyzer name resolution: same column shape, same
10 : // `googlesql::Type*` allocations, but with a working
11 : // `CreateEvaluatorTableIterator` override that wraps
12 : // `Storage::ScanRows` and converts each storage `Value` into a
13 : // `googlesql::Value` of the matching column type as the consumer
14 : // pulls rows.
15 : //
16 : // The DuckDB engine does not drive this iterator (it ATTACHes the
17 : // underlying DuckDB storage and lets DuckDB scan rows directly), so
18 : // the override is currently exercised only by callers that walk a
19 : // `googlesql::Table` interface. We keep it because `SimpleTable`'s
20 : // default returns "not supported" and the abstract surface keeps
21 : // the door open for future analyzer-side callers (e.g. constant-
22 : // folding, dryRun planning) that may want a row stream.
23 : //
24 : // We subclass `googlesql::SimpleTable` so column-list management
25 : // (NumColumns / GetColumn / FindColumnByName / set_full_name / ...)
26 : // stays one place. The override is just the iterator factory; nothing
27 : // else changes about how the analyzer or the algebrizer sees the
28 : // table.
29 : //
30 : // `StorageTable` itself does NOT own the `Storage` it reads from --
31 : // the storage instance lives at engine scope (constructed once at
32 : // startup, see `binaries/emulator_main/main.cc`). The catalog +
33 : // `StorageTable` lifetime is per query; the engine creates the
34 : // catalog when a `Query.ExecuteQuery` RPC arrives and destroys it
35 : // once the row stream completes.
36 :
37 : #include <memory>
38 : #include <string>
39 : #include <vector>
40 :
41 : #include "absl/status/status.h"
42 : #include "absl/status/statusor.h"
43 : #include "absl/strings/string_view.h"
44 : #include "absl/types/span.h"
45 : #include "backend/schema/schema.h"
46 : #include "backend/storage/storage.h"
47 : #include "googlesql/public/evaluator_table_iterator.h"
48 : #include "googlesql/public/simple_catalog.h"
49 : #include "googlesql/public/type.h"
50 : #include "googlesql/public/value.h"
51 :
52 : namespace bigquery_emulator {
53 : namespace backend {
54 : namespace catalog {
55 :
56 : // Adapter `googlesql::Table` whose row stream comes from
57 : // `Storage::ScanRows(table_id)`. The column shape is fixed at
58 : // construction time (matching `bq_schema`); the iterator projects out
59 : // the columns the evaluator requests.
60 : class StorageTable : public ::googlesql::SimpleTable {
61 : public:
62 : // Builds the adapter from an engine-agnostic BigQuery schema. The
63 : // `columns` argument must be aligned 1:1 with `bq_schema.columns`
64 : // (same length, same order); each entry pairs the column's name with
65 : // its analyzer-allocated `googlesql::Type*`. The catalog adapter
66 : // (`GoogleSqlCatalog::MaterializeTable`) is the only caller that
67 : // already has both pieces in hand, which is why the constructor
68 : // takes them paired rather than re-running the type translation.
69 : StorageTable(absl::string_view name,
70 : absl::string_view full_name,
71 : absl::Span<const NameAndType> columns,
72 : schema::TableSchema bq_schema,
73 : storage::TableId table_id,
74 : const storage::Storage* storage);
75 :
76 : ~StorageTable() override = default;
77 :
78 : StorageTable(const StorageTable&) = delete;
79 : StorageTable& operator=(const StorageTable&) = delete;
80 :
81 : // Streams the rows of the underlying `Storage` table, projected
82 : // down to `column_idxs`. The returned iterator captures a snapshot
83 : // taken at call time (semantics inherited from `Storage::ScanRows`)
84 : // and converts each storage `Value` to the matching
85 : // `googlesql::Value` of the column's type on the fly.
86 : absl::StatusOr<std::unique_ptr<::googlesql::EvaluatorTableIterator>>
87 : CreateEvaluatorTableIterator(
88 : absl::Span<const int> column_idxs) const override;
89 :
90 : // Backing storage identifier the analyzer-allocated `Table` adapter
91 : // is materialized from. Used by the DuckDB engine to map
92 : // a `ResolvedTableScan::table()` pointer back to the
93 : // `Storage::ScanRows(id)` call site so the engine can ATTACH the
94 : // rows into its DuckDB connection before executing the transpiled
95 : // SQL. The catalog hands out the same `Table*` for the lifetime of
96 : // the catalog instance, so the returned reference is stable across
97 : // the same query.
98 0 : const storage::TableId& storage_table_id() const {
99 0 : return table_id_;
100 0 : }
101 :
102 : // Engine-agnostic schema the storage layer keeps for this table.
103 : // Companion accessor to `storage_table_id()`: the DuckDB engine
104 : // consults it to emit the matching `CREATE TABLE` DDL when it
105 : // loads the rows into its in-memory DuckDB connection.
106 0 : const schema::TableSchema& bq_schema() const {
107 0 : return bq_schema_;
108 0 : }
109 :
110 : private:
111 : // The BigQuery-level schema mirrors `SimpleTable::columns_` 1:1 but
112 : // carries the engine-agnostic `ColumnType` discriminator the cell
113 : // converter needs to interpret storage `Value`s. We keep a private
114 : // copy because the catalog re-materializes the catalog per query
115 : // and we don't want lookups during iteration to touch the storage
116 : // mutex.
117 : const schema::TableSchema bq_schema_;
118 : const storage::TableId table_id_;
119 : const storage::Storage* const storage_; // not owned
120 : };
121 :
122 : // Convert an engine-agnostic storage `Value` into a `googlesql::Value`
123 : // of `type`. Exposed for unit tests; the iterator implementation is
124 : // the only production caller. NULLs round-trip as
125 : // `googlesql::Value::Null(type)` so the evaluator's NULL bitmap stays
126 : // honest.
127 : //
128 : // Returns `INVALID_ARGUMENT` when the storage Value's kind cannot be
129 : // reconciled with `type` -- e.g. a kInt64 cell paired with a STRING
130 : // column. Storage validates its own shape against the table schema
131 : // on `AppendRows`, so this branch only fires when the schema and the
132 : // rows have diverged on disk.
133 : absl::StatusOr<::googlesql::Value> StorageValueToGoogleSqlValue(
134 : const storage::Value& value, const ::googlesql::Type* type);
135 :
136 : // Same as above but uses `column` for nested ARRAY / STRUCT field
137 : // layout when converting storage cells (required for deep-STRUCT DML).
138 : absl::StatusOr<::googlesql::Value> StorageValueToGoogleSqlValue(
139 : const storage::Value& value,
140 : const ::googlesql::Type* type,
141 : const schema::ColumnSchema& column);
142 :
143 : } // namespace catalog
144 : } // namespace backend
145 : } // namespace bigquery_emulator
146 :
147 : #endif // BIGQUERY_EMULATOR_BACKEND_CATALOG_STORAGE_TABLE_H_
|