LCOV - code coverage report
Current view: top level - backend/engine - engine.h (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 0.0 % 12 0
Test Date: 2026-07-02 21:01:18 Functions: 0.0 % 2 0

            Line data    Source code
       1              : #ifndef BIGQUERY_EMULATOR_BACKEND_ENGINE_ENGINE_H_
       2              : #define BIGQUERY_EMULATOR_BACKEND_ENGINE_ENGINE_H_
       3              : 
       4              : // Engine is the C++ engine's query execution interface.
       5              : //
       6              : // The only implementation lives at `backend/engine/duckdb/`: it
       7              : // transpiles the GoogleSQL ResolvedAST into DuckDB SQL via a custom
       8              : // visitor and executes it through DuckDB's C++ client.
       9              : //
      10              : // This header defines the abstract surface only. The
      11              : // `googlesql::Catalog` parameter is forward-declared so this header
      12              : // stays free of any GoogleSQL include dependency. The `AnalyzedQuery`
      13              : // and `RowSource` opaque interfaces let us return a resolved AST
      14              : // handle and a streamed result without leaking engine-specific types
      15              : // up to the gRPC handlers.
      16              : 
      17              : #include <cstdint>
      18              : #include <memory>
      19              : #include <string>
      20              : #include <utility>
      21              : #include <vector>
      22              : 
      23              : #include "absl/status/status.h"
      24              : #include "absl/status/statusor.h"
      25              : #include "absl/strings/string_view.h"
      26              : #include "backend/engine/phase_recorder.h"
      27              : #include "backend/schema/schema.h"
      28              : #include "backend/storage/storage.h"
      29              : 
      30              : // Forward-declared so this header does not pull in any GoogleSQL
      31              : // headers. The DuckDB engine downcasts the `googlesql::Catalog*` to
      32              : // its own catalog adapter when it actually runs analysis.
      33              : namespace googlesql {
      34              : class Catalog;
      35              : }  // namespace googlesql
      36              : 
      37              : namespace bigquery_emulator {
      38              : namespace backend {
      39              : namespace engine {
      40              : 
      41              : // One BigQuery query parameter (named or positional). `value_json`
      42              : // carries the JSON-encoded literal value the gateway received on the
      43              : // REST request; the engine round-trips it through GoogleSQL's literal
      44              : // parser at analysis time.
      45              : struct QueryParameter {
      46              :   // Empty for positional parameters (BigQuery's `@0`, `@1`, ...).
      47              :   std::string name;
      48              :   // GoogleSQL `TypeKind` name, e.g. "INT64", "STRING".
      49              :   std::string type_kind;
      50              :   std::string value_json;
      51              :   // Gateway-encoded REST `parameterType` descriptor for STRUCT/ARRAY
      52              :   // parameters (field names and nested type kinds); empty for scalars.
      53              :   std::string type_json;
      54              : };
      55              : 
      56              : // One query the engine is asked to plan or execute. The fields mirror
      57              : // `bigquery_emulator.v1.QueryRequest` from `proto/emulator.proto`.
      58              : struct QueryRequest {
      59              :   std::string project_id;
      60              :   // Default dataset for unqualified table references. May be empty.
      61              :   std::string default_dataset_id;
      62              :   std::string sql;
      63              :   std::vector<QueryParameter> parameters;
      64              :   // BigQuery defaults `useLegacySql` to true on the wire; the gateway
      65              :   // rejects that case (see the gateway-HTTP-surface section of
      66              :   // ROADMAP.md) so by the time a request reaches here this field
      67              :   // should always be false. We keep it as a
      68              :   // belt-and-braces field so the engine can also error out if the
      69              :   // gateway ever stops enforcing.
      70              :   bool use_legacy_sql = false;
      71              :   // Synthetic principal from the gateway; defaults to
      72              :   // catalog::kEmulatorPrincipalEmail when empty.
      73              :   std::string principal_email;
      74              :   // Optional per-query phase recorder populated by the frontend and
      75              :   // filled by coordinator / executor paths for loopback diagnostics.
      76              :   PhaseRecorderPtr phase_recorder;
      77              : };
      78              : 
      79              : // Opaque handle for a parsed + name-resolved query. The DuckDB engine
      80              : // hides its own ResolvedAST plus any side state (extracted
      81              : // parameters, default dataset, etc.) behind this interface.
      82              : class AnalyzedQuery {
      83              :  public:
      84              :   virtual ~AnalyzedQuery();
      85              : 
      86              :   // The schema of the rows the query will produce on
      87              :   // `Engine::ExecuteQuery`.
      88              :   virtual const schema::TableSchema& output_schema() const = 0;
      89              : };
      90              : 
      91              : // Streamed query result. The engine produces rows one at a time;
      92              : // `Next` returns false on end-of-stream. The DuckDB engine batches
      93              : // internally and streams rows out one-by-one.
      94              : class RowSource {
      95              :  public:
      96              :   virtual ~RowSource();
      97              : 
      98              :   virtual const schema::TableSchema& schema() const = 0;
      99              : 
     100              :   // Pulls the next row into `*row`. Returns:
     101              :   //   * `true`  - a row was written.
     102              :   //   * `false` - end of stream; `*row` is unchanged.
     103              :   // A non-OK status indicates an execution error; further calls are
     104              :   // undefined.
     105              :   virtual absl::StatusOr<bool> Next(storage::Row* row) = 0;
     106              : };
     107              : 
     108              : // Result of a `DryRun`. Mirrors the BigQuery
     109              : // `Job.statistics.query.{schema,totalBytesProcessed}` shape the
     110              : // gateway exposes on `jobs.query?dryRun=true`.
     111              : struct DryRunResult {
     112              :   schema::TableSchema schema;
     113              :   int64_t estimated_bytes_processed = 0;
     114              : };
     115              : 
     116              : // Result of `Engine::ExecuteDml`: per-statement modification counts
     117              : // for an INSERT / UPDATE / DELETE / MERGE statement. Mirrors the
     118              : // BigQuery REST `Job.statistics.query.dmlStats` envelope; the
     119              : // frontend handler folds these counts into a final
     120              : // `QueryResultRow.dml_stats` message on the `Query.ExecuteQuery`
     121              : // stream.
     122              : struct DmlStats {
     123              :   // Number of rows added by INSERT / MERGE-INSERT branches.
     124              :   int64_t inserted_row_count = 0;
     125              :   // Number of rows updated by UPDATE / MERGE-UPDATE branches.
     126              :   int64_t updated_row_count = 0;
     127              :   // Number of rows removed by DELETE / MERGE-DELETE branches.
     128              :   int64_t deleted_row_count = 0;
     129              : };
     130              : 
     131              : // Result of `Engine::ExecuteDml` when the statement may carry a
     132              : // `THEN RETURN` clause. `returning_rows` is non-null only when the
     133              : // resolved AST includes `ResolvedReturningClause`; the frontend
     134              : // streams its schema + rows before the trailing `dml_stats` message.
     135              : struct DmlResult {
     136              :   DmlStats stats;
     137              :   std::unique_ptr<RowSource> returning_rows;
     138              : };
     139              : 
     140              : // Engine is the abstract interface every query backend implements.
     141              : //
     142              : // Lifetime: created once at startup with a `Storage*` and a
     143              : // `googlesql::Catalog*` already wired up; shared by every gRPC
     144              : // request handler. All methods are thread-safe.
     145              : class Engine {
     146              :  public:
     147              :   virtual ~Engine();
     148              : 
     149              :   // Parse + name-resolve + type-check `request.sql` against `catalog`.
     150              :   // Returns an opaque `AnalyzedQuery` the caller can hand back to
     151              :   // `DryRun` / `ExecuteQuery`, OR a parse / analysis error mapped to
     152              :   // the matching absl::Status code (the gateway translates that into
     153              :   // a BigQuery error envelope; see the analyzer integration section
     154              :   // of ROADMAP.md).
     155              :   //
     156              :   // `[[nodiscard]]` is on every Status / StatusOr-returning method
     157              :   // here for the same reason it is on `backend::storage::Storage`:
     158              :   // dropping the result silently swallows a parse / analysis error
     159              :   // that the gateway has no other channel to surface.
     160              :   [[nodiscard]] virtual absl::StatusOr<std::unique_ptr<AnalyzedQuery>> Analyze(
     161              :       const QueryRequest& request, googlesql::Catalog* catalog) = 0;
     162              : 
     163              :   // Plan-only path used by `jobs.query?dryRun=true`. Implementations
     164              :   // are free to short-circuit through `Analyze` internally.
     165              :   [[nodiscard]] virtual absl::StatusOr<DryRunResult> DryRun(
     166              :       const QueryRequest& request, googlesql::Catalog* catalog) = 0;
     167              : 
     168              :   // Plan + execute. The returned `RowSource` streams the result rows
     169              :   // back to the gateway one by one; the gateway paginates them out
     170              :   // through the `bigquery.jobs.query` and
     171              :   // `bigquery.jobs.getQueryResults` REST endpoints.
     172              :   [[nodiscard]] virtual absl::StatusOr<std::unique_ptr<RowSource>> ExecuteQuery(
     173              :       const QueryRequest& request, googlesql::Catalog* catalog) = 0;
     174              : 
     175              :   // Plan + execute a DML statement (INSERT / UPDATE / DELETE / MERGE)
     176              :   // and return the per-statement modification counts. The engine is
     177              :   // expected to apply the changes to the underlying `Storage` it was
     178              :   // constructed with -- callers do not see the modified rows, only
     179              :   // the count summary the gateway folds into BigQuery's
     180              :   // `dmlStats` / `numDmlAffectedRows` fields. Engines that do not
     181              :   // implement DML yet return `absl::StatusCode::kUnimplemented`; the
     182              :   // frontend handler maps that to gRPC `UNIMPLEMENTED` so the
     183              :   // gateway can surface BigQuery's `notImplemented` reason.
     184              :   [[nodiscard]] virtual absl::StatusOr<DmlResult> ExecuteDml(
     185            0 :       const QueryRequest& request, googlesql::Catalog* catalog) {
     186            0 :     (void)request;
     187            0 :     (void)catalog;
     188            0 :     return absl::UnimplementedError(
     189            0 :         "Engine::ExecuteDml is not implemented in this engine");
     190            0 :   }
     191              : 
     192              :   // Plan + execute a DDL statement
     193              :   // (CREATE TABLE / CREATE TABLE AS SELECT / DROP TABLE / ALTER TABLE
     194              :   // ADD COLUMN). The engine mutates the underlying `Storage` -- there
     195              :   // is no row-shaped reply, just success (OK) or a status mapped to
     196              :   // the matching gRPC code. Engines that do not implement DDL return
     197              :   // `absl::StatusCode::kUnimplemented`; the frontend handler maps
     198              :   // that to gRPC `UNIMPLEMENTED`.
     199              :   [[nodiscard]] virtual absl::Status ExecuteDdl(const QueryRequest& request,
     200            0 :                                                 googlesql::Catalog* catalog) {
     201            0 :     (void)request;
     202            0 :     (void)catalog;
     203            0 :     return absl::UnimplementedError(
     204            0 :         "Engine::ExecuteDdl is not implemented in this engine");
     205            0 :   }
     206              : };
     207              : 
     208              : }  // namespace engine
     209              : }  // namespace backend
     210              : }  // namespace bigquery_emulator
     211              : 
     212              : #endif  // BIGQUERY_EMULATOR_BACKEND_ENGINE_ENGINE_H_
        

Generated by: LCOV version 2.0-1