Line data Source code
1 : #ifndef BIGQUERY_EMULATOR_BACKEND_ENGINE_SEMANTIC_ROW_SOURCE_H_
2 : #define BIGQUERY_EMULATOR_BACKEND_ENGINE_SEMANTIC_ROW_SOURCE_H_
3 :
4 : // Compositional `RowSource` adapters used by the semantic executor.
5 : //
6 : // Design constraint #2 from
7 : // `docs/ENGINE_POLICY.md` is that the
8 : // semantic executor REUSES `DuckDbExecutor` for shapes the
9 : // fast-path covers (table scans, joins, aggregations); it never
10 : // opens a new DuckDB connection of its own. The
11 : // `MaterializedRowSource` here is the composition primitive
12 : // downstream plans use to:
13 : //
14 : // * stream rows produced by a `DuckDbExecutor`-backed `RowSource`
15 : // through a semantic-executor projection (e.g. a SELECT whose
16 : // FROM is a fast-path scan but whose projection list contains
17 : // a SAFE_DIVIDE that must surface BigQuery-exact errors);
18 : // * inject synthetic Arrow batches into unit / integration tests
19 : // so the streaming contract can be exercised without spinning
20 : // up a real DuckDB query.
21 : //
22 : // For the basic scalar-only SELECT path shipped in this plan, the
23 : // executor never reaches for the adapter -- the value table /
24 : // FROM-clause shapes that do are owned by
25 : // `docs/ENGINE_POLICY.md` and
26 : // `docs/ENGINE_POLICY.md`. The adapter ships here so those
27 : // plans have a stable interface to consume.
28 :
29 : #include <memory>
30 : #include <utility>
31 : #include <vector>
32 :
33 : #include "absl/status/status.h"
34 : #include "absl/status/statusor.h"
35 : #include "backend/engine/engine.h"
36 : #include "backend/schema/schema.h"
37 : #include "backend/storage/storage.h"
38 :
39 : namespace bigquery_emulator {
40 : namespace backend {
41 : namespace engine {
42 : namespace semantic {
43 :
44 : // `MaterializedRowSource` is the simplest `RowSource` adapter: it
45 : // owns a pre-built vector of `storage::Row` plus a `TableSchema`
46 : // and streams them one-at-a-time on `Next`. Downstream plans use
47 : // it to wrap a drained `DuckDbExecutor` result, but it is also the
48 : // primitive the semantic executor's scalar-only SELECT path uses
49 : // internally (single-row case).
50 : class MaterializedRowSource : public RowSource {
51 : public:
52 : MaterializedRowSource(schema::TableSchema schema,
53 : std::vector<storage::Row> rows)
54 4 : : schema_(std::move(schema)), rows_(std::move(rows)) {}
55 :
56 3 : const schema::TableSchema& schema() const override {
57 3 : return schema_;
58 3 : }
59 :
60 13 : absl::StatusOr<bool> Next(storage::Row* row) override {
61 13 : if (row == nullptr) {
62 1 : return absl::InvalidArgumentError(
63 1 : "MaterializedRowSource::Next called with null row");
64 1 : }
65 12 : if (cursor_ >= rows_.size()) return false;
66 8 : *row = rows_[cursor_++];
67 8 : return true;
68 12 : }
69 :
70 : private:
71 : schema::TableSchema schema_;
72 : std::vector<storage::Row> rows_;
73 : size_t cursor_ = 0;
74 : };
75 :
76 : // `DrainRowSource` reads every row out of `source` (a producer the
77 : // caller owns) into a `MaterializedRowSource`. Used by downstream
78 : // plans to wrap a `DuckDbExecutor`-backed result and stream it
79 : // through the semantic executor.
80 : //
81 : // Drains eagerly so the upstream's lifetime is decoupled from the
82 : // returned adapter's; the upstream is closed when the caller drops
83 : // it. The single materialization pass is acceptable today because
84 : // the fast-path shapes the semantic executor composes with cap out
85 : // at a few thousand rows; streaming-style draining is in scope for
86 : // the row-source plan that follows
87 : // `docs/ENGINE_POLICY.md`.
88 : absl::StatusOr<std::unique_ptr<RowSource>> DrainRowSource(RowSource& source);
89 :
90 : } // namespace semantic
91 : } // namespace engine
92 : } // namespace backend
93 : } // namespace bigquery_emulator
94 :
95 : #endif // BIGQUERY_EMULATOR_BACKEND_ENGINE_SEMANTIC_ROW_SOURCE_H_
|