package main
import (
"context"
"errors"
"flag"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/bench/runner"
)
func main() {
var (
targetFlag = flag.String("target", "emulator", "emulator, goccy, bigquery, or all")
casesDir = flag.String("cases", defaultCasesDir(), "directory of benchmark YAML cases")
baselinePath = flag.String("baseline", defaultBaselinePath(), "path to bigquery.json baseline")
capture = flag.Bool("capture", false, "capture baseline (requires --target=bigquery)")
compare = flag.Bool("compare", false, "compare emulator results to committed baseline")
jsonOut = flag.String("json-out", "", "write machine-readable results JSON")
project = flag.String("project", os.Getenv("BENCH_BQ_PROJECT"), "BigQuery project for capture")
goccyImage = flag.String("goccy-image", "", "docker image for goccy emulator")
caseFilter = flag.String("case", "", "run a single case by name")
engineBin = flag.String("engine-binary", "", "path to emulator_main")
skipGoccy = flag.Bool(
"skip-goccy",
os.Getenv("BENCH_SKIP_GOCCY") == "1",
"skip goccy target when --target=all",
)
quiet = flag.Bool("quiet", false, "suppress per-case progress logging on stderr")
)
flag.Parse()
if err := run(context.Background(), config{
target: *targetFlag,
casesDir: *casesDir,
baselinePath: *baselinePath,
capture: *capture,
compare: *compare,
jsonOut: *jsonOut,
project: *project,
goccyImage: *goccyImage,
caseFilter: *caseFilter,
engineBin: *engineBin,
skipGoccy: *skipGoccy,
quiet: *quiet,
}); err != nil {
fmt.Fprintf(os.Stderr, "bench: %v\n", err)
os.Exit(1)
}
}
type config struct {
target string
casesDir string
baselinePath string
capture bool
compare bool
jsonOut string
project string
goccyImage string
caseFilter string
engineBin string
skipGoccy bool
quiet bool
}
func run(ctx context.Context, cfg config) error {
opts := runner.TargetOptions{
EngineBinary: cfg.engineBin,
GoccyImage: cfg.goccyImage,
BQProject: cfg.project,
}
targets, err := resolveTargets(cfg.target, opts, cfg.skipGoccy)
if err != nil {
return err
}
baseline := loadBaseline(cfg)
progress := func(format string, args ...any) {
_, _ = fmt.Fprintf(os.Stderr, "%s bench: %s\n",
time.Now().Format("15:04:05"), fmt.Sprintf(format, args...))
}
if cfg.quiet {
progress = nil
}
report, err := runner.Run(ctx, runner.RunOptions{
CasesDir: cfg.casesDir,
CaseFilter: cfg.caseFilter,
Targets: targets,
Baseline: baseline,
Compare: cfg.compare,
Progress: progress,
})
if err != nil {
return err
}
if err := writeRunOutputs(cfg, targets, report, baseline); err != nil {
return err
}
return enforceCompareGate(cfg, report)
}
func loadBaseline(cfg config) *runner.BaselineFile {
if cfg.capture {
return nil
}
if !cfg.compare {
return nil
}
loaded, loadErr := runner.LoadBaseline(cfg.baselinePath)
if loadErr != nil && cfg.compare {
return &runner.BaselineFile{Cases: map[string]runner.BaselineCase{}}
}
if loadErr == nil {
return &loaded
}
return nil
}
func writeRunOutputs(
cfg config,
targets []runner.Target,
report runner.RunReport,
baseline *runner.BaselineFile,
) error {
if cfg.goccyImage != "" {
report.GoccyImage = cfg.goccyImage
} else if !cfg.skipGoccy && containsTarget(targets, runner.TargetGoccy) {
report.GoccyImage = runner.ImageTag(runner.DefaultGoccyImage())
}
runner.PrintTextReport(os.Stdout, report, baseline)
if err := saveJSONReport(cfg, report); err != nil {
return err
}
if !cfg.capture {
return nil
}
if cfg.project == "" {
return errors.New("--project or BENCH_BQ_PROJECT required for capture")
}
b := runner.BuildBaselineFromResults(cfg.project, report.Results)
// Merge into any existing baseline so a partial capture (e.g.
// --case create_view_100k) updates only the cases that ran instead of
// discarding the rest of the file.
if existing, loadErr := runner.LoadBaseline(cfg.baselinePath); loadErr == nil {
b = runner.MergeBaseline(existing, b)
}
if err := runner.SaveBaseline(cfg.baselinePath, b); err != nil {
return err
}
_, _ = fmt.Fprintf(os.Stdout, "wrote baseline %s (%d cases)\n", cfg.baselinePath, len(b.Cases))
return nil
}
func saveJSONReport(cfg config, report runner.RunReport) error {
if cfg.jsonOut == "" {
return nil
}
toSave := report
// Partial rerun: merge into existing results so other cases stay intact.
if cfg.caseFilter != "" {
if existing, loadErr := runner.LoadReport(cfg.jsonOut); loadErr == nil {
toSave = runner.MergeReport(existing, report)
}
}
if err := runner.SaveReport(cfg.jsonOut, toSave); err != nil {
return err
}
if cfg.caseFilter != "" {
_, _ = fmt.Fprintf(os.Stdout, "merged results into %s (%d rows)\n",
cfg.jsonOut, len(toSave.Results))
}
return nil
}
func enforceCompareGate(cfg config, report runner.RunReport) error {
if !cfg.compare {
return nil
}
fail := 0
for _, r := range report.Results {
if r.Target == runner.TargetEmulator && r.Pass != nil && !*r.Pass {
fail++
}
}
if fail > 0 {
return fmt.Errorf("%d emulator case(s) failed compare gate", fail)
}
return nil
}
func resolveTargets(name string, opts runner.TargetOptions, skipGoccy bool) ([]runner.Target, error) {
name = strings.ToLower(strings.TrimSpace(name))
switch name {
case "emulator":
return []runner.Target{runner.NewEmulatorTarget(opts)}, nil
case "goccy":
return []runner.Target{runner.NewGoccyTarget(opts)}, nil
case "bigquery", "bq":
return []runner.Target{runner.NewBigQueryTarget(opts)}, nil
case "all":
var out []runner.Target
out = append(out, runner.NewEmulatorTarget(opts))
if !skipGoccy {
out = append(out, runner.NewGoccyTarget(opts))
}
return out, nil
case "compare":
return []runner.Target{runner.NewEmulatorTarget(opts)}, nil
default:
return nil, fmt.Errorf("unknown target %q", name)
}
}
func containsTarget(targets []runner.Target, name runner.TargetName) bool {
for _, t := range targets {
if t.Name() == name {
return true
}
}
return false
}
func defaultCasesDir() string {
root, err := repoRoot()
if err != nil {
return "bench/cases"
}
return filepath.Join(root, "bench", "cases")
}
func defaultBaselinePath() string {
root, err := repoRoot()
if err != nil {
return "bench/baselines/bigquery.json"
}
return filepath.Join(root, "bench", "baselines", "bigquery.json")
}
func repoRoot() (string, error) {
wd, err := os.Getwd()
if err != nil {
return "", err
}
dir := wd
for {
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
return dir, nil
}
parent := filepath.Dir(dir)
if parent == dir {
return "", fmt.Errorf("go.mod not found from %s", wd)
}
dir = parent
}
}
package runner
import (
"encoding/json"
"fmt"
"maps"
"os"
"time"
)
// BaselineFile is the committed golden BigQuery capture.
type BaselineFile struct {
CapturedAt time.Time `json:"captured_at"`
Project string `json:"project"`
ProjectHash string `json:"project_hash,omitempty"`
Cases map[string]BaselineCase `json:"cases"`
}
// BaselineCase holds golden latency and correctness for one case.
type BaselineCase struct {
ContentHash string `json:"content_hash"`
TotalP50MS int64 `json:"total_p50_ms"`
ExecutionP50MS int64 `json:"execution_p50_ms"`
QueueP50MS int64 `json:"queue_p50_ms,omitempty"`
TotalSlotMsP50 int64 `json:"total_slot_ms_p50,omitempty"`
BytesProcessed int64 `json:"bytes_processed,omitempty"`
ResultHash string `json:"result_hash"`
RowCount int `json:"row_count"`
}
// LoadBaseline reads bench/baselines/bigquery.json.
func LoadBaseline(path string) (BaselineFile, error) {
raw, err := os.ReadFile(path) //nolint:gosec // baseline path is CLI-controlled
if err != nil {
return BaselineFile{}, err
}
var b BaselineFile
if err := json.Unmarshal(raw, &b); err != nil {
return BaselineFile{}, err
}
if b.Cases == nil {
b.Cases = map[string]BaselineCase{}
}
return b, nil
}
// SaveBaseline writes a baseline file.
//
//nolint:gosec // baseline path and 0o644 mode are CLI-controlled benchmark artifacts.
func SaveBaseline(path string, b BaselineFile) error {
b.CapturedAt = b.CapturedAt.UTC()
raw, err := json.MarshalIndent(b, "", " ")
if err != nil {
return err
}
return os.WriteFile(
path,
append(raw, '\n'),
0o644,
)
}
// BuildBaselineFromResults constructs a baseline from a BQ benchmark run.
func BuildBaselineFromResults(project string, results []CaseResult) BaselineFile {
b := BaselineFile{
CapturedAt: time.Now().UTC(),
Project: project,
Cases: map[string]BaselineCase{},
}
for _, r := range results {
if r.Target != TargetBigQuery || r.Outcome != OutcomeOK {
continue
}
b.Cases[r.CaseName] = BaselineCase{
ContentHash: r.ContentHash,
TotalP50MS: r.Latency.P50.Milliseconds(),
ExecutionP50MS: r.ExecutionP50.Milliseconds(),
QueueP50MS: r.QueueP50.Milliseconds(),
TotalSlotMsP50: r.TotalSlotMsP50,
BytesProcessed: r.BytesProcessed,
ResultHash: r.ResultHash,
RowCount: r.RowCount,
}
}
return b
}
// MergeBaseline overlays fresh capture results onto an existing baseline,
// preserving cases that were not part of this run. This makes partial
// captures (e.g. --case create_view_100k) update or add only the cases that
// ran, instead of discarding every other case in the file. The captured-at
// timestamp and project are taken from the fresh capture.
func MergeBaseline(existing, fresh BaselineFile) BaselineFile {
out := existing
if out.Cases == nil {
out.Cases = map[string]BaselineCase{}
}
out.CapturedAt = fresh.CapturedAt
if fresh.Project != "" {
out.Project = fresh.Project
}
if fresh.ProjectHash != "" {
out.ProjectHash = fresh.ProjectHash
}
maps.Copy(out.Cases, fresh.Cases)
return out
}
// CompareToBaseline checks emulator result against golden baseline.
func CompareToBaseline(c Case, base BaselineCase, r CaseResult) (pass bool, reason string) {
if base.ContentHash != "" && base.ContentHash != c.ContentHash {
return false, fmt.Sprintf("stale baseline (case changed): want hash %s got %s", base.ContentHash, c.ContentHash)
}
if r.Outcome == OutcomeWrongResult {
return false, "wrong result vs baseline hash"
}
if r.Outcome != OutcomeOK {
return false, string(r.Outcome) + ": " + r.Error
}
emuLatency := r.CompareLatencyP50()
threshold := time.Duration(c.MaxMS) * time.Millisecond
bqMS := base.LatencyP50ForRatio()
if bqMS > 0 {
bq := time.Duration(bqMS) * time.Millisecond
ratioThreshold := time.Duration(float64(bq) * c.MaxRatio)
if ratioThreshold > threshold {
threshold = ratioThreshold
}
}
if emuLatency > threshold {
return false, fmt.Sprintf("p50 %s > threshold %s (bq execution p50 %dms, ratio %.2f)",
emuLatency, threshold, base.LatencyP50MS(), c.MaxRatio)
}
return true, ""
}
package runner
import (
"context"
"errors"
"fmt"
"math/rand"
"net/http"
"strconv"
"strings"
"time"
"cloud.google.com/go/bigquery"
"google.golang.org/api/googleapi"
"google.golang.org/api/iterator"
)
// Exponential-backoff knobs for BigQuery rate-limit/quota retries. These
// are vars (not consts) so tests can shrink the waits. DDL-heavy cases
// (CREATE OR REPLACE TABLE/VIEW on the same object) trip BigQuery's
// per-table metadata-update quota, which only clears over several seconds;
// backoff spreads retries until the window reopens.
//
// bqAttemptTimeout caps a single submission attempt. The BigQuery client
// retries jobRateLimitExceeded *internally* on the call's context until that
// context is done (see runWithRetryExplicit in the client), so without a
// per-attempt cap the client's own retry consumes the entire per-query
// deadline and our backoff loop never runs (the "0 retries" symptom). Capping
// each attempt hands control back to this loop while still leaving the client's
// short internal backoff intact within the slice. It must exceed the slowest
// legitimate query/setup in the suite (a few seconds) by a wide margin.
var (
bqBaseBackoff = 1 * time.Second
bqMaxBackoff = 32 * time.Second
bqMaxRetries = 8
bqAttemptTimeout = 30 * time.Second
)
// isNotFound reports whether err is a BigQuery 404 (dataset absent).
func isNotFound(err error) bool {
var apiErr *googleapi.Error
return errors.As(err, &apiErr) && apiErr.Code == http.StatusNotFound
}
// BigQueryTarget runs cases against real BigQuery via ADC.
type BigQueryTarget struct {
opts TargetOptions
client *bigquery.Client
project string
datasets []string
location string
}
func NewBigQueryTarget(opts TargetOptions) *BigQueryTarget {
return &BigQueryTarget{opts: opts}
}
func (t *BigQueryTarget) Name() TargetName { return TargetBigQuery }
func (t *BigQueryTarget) Start(ctx context.Context) error {
if t.opts.BQProject == "" {
return errors.New("BENCH_BQ_PROJECT or --project is required for bigquery target")
}
t.project = t.opts.BQProject
t.location = t.opts.BQLocation
if t.location == "" {
t.location = "US"
}
client, err := bigquery.NewClient(ctx, t.project)
if err != nil {
return fmt.Errorf("bigquery.NewClient: %w", err)
}
t.client = client
return nil
}
// ProjectID returns the billing project for BigQuery runs.
func (t *BigQueryTarget) ProjectID() string { return t.project }
func (t *BigQueryTarget) SetupCase(ctx context.Context, c Case, dataset string) error {
dsID := strings.TrimPrefix(dataset, t.project+".")
// Drop any leftover dataset from a previous (interrupted) run so
// setup always starts from a clean slate. NotFound is the normal
// case and is ignored.
if err := t.client.Dataset(dsID).DeleteWithContents(ctx); err != nil && !isNotFound(err) {
return fmt.Errorf("delete stale dataset %s: %w", dsID, err)
}
meta := &bigquery.DatasetMetadata{
Location: t.location,
DefaultTableExpiration: 24 * time.Hour,
}
if err := t.client.Dataset(dsID).Create(ctx, meta); err != nil {
return fmt.Errorf("create dataset %s: %w", dsID, err)
}
t.datasets = append(t.datasets, dsID)
setup, _ := c.Substitute(dataset, t.project)
for _, sql := range setup {
if err := t.runSQL(ctx, sql); err != nil {
return err
}
}
return nil
}
func (t *BigQueryTarget) RunQuery(ctx context.Context, c Case, sql string, timeout time.Duration) (QueryResult, error) {
if timeout <= 0 {
timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
}
return timedQuery(ctx, func(ctx context.Context) (QueryResult, error) {
job, err := t.runJob(ctx, sql)
if err != nil {
return QueryResult{Error: err.Error()}, err
}
status := job.LastStatus()
metrics, err := extractBQJobMetrics(status)
if err != nil {
return QueryResult{Error: err.Error()}, err
}
if metrics.cacheHit {
cacheErr := errors.New("bigquery query cache hit (DisableQueryCache ineffective)")
return QueryResult{Error: cacheErr.Error()}, cacheErr
}
it, err := job.Read(ctx)
if err != nil {
return QueryResult{
Error: err.Error(),
ExecutionOnly: metrics.execution,
ExecutionValid: true,
QueueOnly: metrics.queue,
SlotMs: metrics.slotMs,
}, err
}
rows, err := readAllRows(it)
if err != nil {
return QueryResult{
Error: err.Error(),
ExecutionOnly: metrics.execution,
ExecutionValid: true,
QueueOnly: metrics.queue,
SlotMs: metrics.slotMs,
}, err
}
maps := bqRowsToMaps(rows)
hash, _ := HashRows(maps)
return QueryResult{
ExecutionOnly: metrics.execution,
ExecutionValid: true,
QueueOnly: metrics.queue,
SlotMs: metrics.slotMs,
BytesProcessed: metrics.bytesProcessed,
Rows: maps,
RowCount: len(maps),
ResultHash: hash,
}, nil
}, timeout)
}
func (t *BigQueryTarget) Cleanup(ctx context.Context) error {
if t.client == nil {
return nil
}
for _, ds := range t.datasets {
if err := t.client.Dataset(ds).DeleteWithContents(ctx); err != nil {
_ = t.client.Close()
return err
}
}
return t.client.Close()
}
func (t *BigQueryTarget) runSQL(ctx context.Context, sql string) error {
job, err := t.runJob(ctx, sql)
if err != nil {
return err
}
status, err := job.Wait(ctx)
if err != nil {
return err
}
if err := status.Err(); err != nil {
return err
}
return nil
}
func (t *BigQueryTarget) runJob(ctx context.Context, sql string) (*bigquery.Job, error) {
return retryOnRateLimit(ctx, func(ctx context.Context) (*bigquery.Job, error) {
return t.runJobOnce(ctx, sql)
})
}
func (t *BigQueryTarget) runJobOnce(ctx context.Context, sql string) (*bigquery.Job, error) {
q := t.client.Query(sql)
// Benchmarks must never read cached results; cache hits yield ~0ms execution.
q.DisableQueryCache = true
q.Location = t.location
job, err := q.Run(ctx)
if err != nil {
return nil, err
}
status, err := job.Wait(ctx)
if err != nil {
return nil, err
}
if err := status.Err(); err != nil {
return nil, err
}
return job, nil
}
// isRateLimitErr reports whether err is a BigQuery throttling/quota or
// transient backend error worth retrying with backoff. Rate-limit errors
// arrive in two shapes: a structured googleapi reason, or an HTTP 400 whose
// only signal is the reason text in the message (e.g. the
// "Job exceeded rate limits: ... jobRateLimitExceeded" we see on repeated
// CREATE OR REPLACE statements).
func isRateLimitErr(err error) bool {
if err == nil {
return false
}
var apiErr *googleapi.Error
if errors.As(err, &apiErr) {
switch apiErr.Code {
case http.StatusTooManyRequests, // 429
http.StatusInternalServerError, // 500
http.StatusBadGateway, // 502
http.StatusServiceUnavailable: // 503
return true
}
for _, e := range apiErr.Errors {
switch e.Reason {
case "rateLimitExceeded", "jobRateLimitExceeded",
"quotaExceeded", "backendError", "internalError":
return true
}
}
}
msg := strings.ToLower(err.Error())
for _, frag := range []string{
"ratelimitexceeded",
"jobratelimitexceeded",
"exceeded rate limits",
"exceeded quota",
"quotaexceeded",
"backenderror",
} {
if strings.Contains(msg, frag) {
return true
}
}
return false
}
// retryOnRateLimit runs fn, retrying rate-limit/quota/backend errors with
// exponential backoff and full jitter until success, a non-retryable error,
// the retry budget is exhausted, or ctx expires.
//
// Each attempt runs against a sub-context capped at bqAttemptTimeout so the
// BigQuery client's internal retryer cannot consume the whole parent deadline
// before this loop gets to back off (context.WithTimeout also caps at the
// parent's own deadline, so we never exceed the per-query budget).
func retryOnRateLimit(
ctx context.Context,
fn func(context.Context) (*bigquery.Job, error),
) (*bigquery.Job, error) {
backoff := bqBaseBackoff
for attempt := 0; ; attempt++ {
attemptCtx, cancel := context.WithTimeout(ctx, bqAttemptTimeout)
job, err := fn(attemptCtx)
cancel()
if err == nil {
return job, nil
}
// A capped attempt that timed out on the rate-limit reason is still a
// rate-limit error worth backing off on; isRateLimitErr matches the
// reason text the client leaves in the wrapped deadline error.
if !isRateLimitErr(err) {
return nil, err
}
if attempt >= bqMaxRetries {
return nil, fmt.Errorf("rate limit: exhausted %d attempts: %w", attempt+1, err)
}
// Parent deadline/cancellation reached: no budget left to back off.
if cerr := ctx.Err(); cerr != nil {
return nil, fmt.Errorf("rate limit: parent context done after %d attempts: %w (last error: %w)",
attempt+1, cerr, err)
}
// Full jitter: wait in [0, backoff] to avoid synchronized retries.
wait := time.Duration(rand.Int63n(int64(backoff) + 1)) //nolint:gosec // jitter, not crypto
timer := time.NewTimer(wait)
select {
case <-ctx.Done():
timer.Stop()
return nil, fmt.Errorf("rate limit: backoff aborted after %d attempts: %w (last error: %w)",
attempt+1, ctx.Err(), err)
case <-timer.C:
}
if backoff < bqMaxBackoff {
backoff *= 2
if backoff > bqMaxBackoff {
backoff = bqMaxBackoff
}
}
}
}
type bqJobMetrics struct {
execution time.Duration
queue time.Duration
slotMs int64
cacheHit bool
bytesProcessed int64
}
func extractBQJobMetrics(status *bigquery.JobStatus) (bqJobMetrics, error) {
if status == nil || status.Statistics == nil {
return bqJobMetrics{}, errors.New("missing BigQuery job statistics")
}
st := status.Statistics
if st.StartTime.IsZero() || st.EndTime.IsZero() {
return bqJobMetrics{}, errors.New("missing BigQuery startTime or endTime")
}
m := bqJobMetrics{
execution: st.EndTime.Sub(st.StartTime),
bytesProcessed: st.TotalBytesProcessed,
}
if !st.CreationTime.IsZero() && st.StartTime.After(st.CreationTime) {
m.queue = st.StartTime.Sub(st.CreationTime)
}
if qs, ok := st.Details.(*bigquery.QueryStatistics); ok {
m.cacheHit = qs.CacheHit
m.slotMs = qs.SlotMillis
}
if m.slotMs == 0 && st.TotalSlotDuration > 0 {
m.slotMs = st.TotalSlotDuration.Milliseconds()
}
return m, nil
}
func readAllRows(it *bigquery.RowIterator) ([]map[string]bigquery.Value, error) {
var out []map[string]bigquery.Value
for {
var row map[string]bigquery.Value
err := it.Next(&row)
if errors.Is(err, iterator.Done) {
break
}
if err != nil {
return nil, err
}
out = append(out, row)
}
return out, nil
}
func bqRowsToMaps(rows []map[string]bigquery.Value) []map[string]string {
out := make([]map[string]string, 0, len(rows))
for _, row := range rows {
m := make(map[string]string, len(row))
for k, v := range row {
m[k] = bqValueToString(v)
}
out = append(out, m)
}
return out
}
func bqValueToString(v bigquery.Value) string {
switch t := v.(type) {
case nil:
return ""
case string:
return t
case int64:
return strconv.FormatInt(t, 10)
case float64:
return strconv.FormatFloat(t, 'f', -1, 64)
case bool:
if t {
return "true"
}
return "false"
default:
return fmt.Sprint(t)
}
}
var _ Target = (*BigQueryTarget)(nil)
package runner
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"time"
"gopkg.in/yaml.v3"
)
// Case is one YAML benchmark definition under bench/cases/.
type Case struct {
Name string `yaml:"name"`
Tags []string `yaml:"tags,omitempty"`
SetupSQL []string `yaml:"-"`
Query string `yaml:"query"`
Iterations int `yaml:"iterations,omitempty"`
Warmup int `yaml:"warmup,omitempty"`
MaxRatio float64 `yaml:"max_ratio,omitempty"`
MaxMS int64 `yaml:"max_ms,omitempty"`
ProjectID string `yaml:"project_id,omitempty"`
SkipTargets []TargetName `yaml:"skip_targets,omitempty"`
SkipReason string `yaml:"skip_reason,omitempty"`
Path string `yaml:"-"`
ContentHash string `yaml:"-"`
}
const (
defaultIterations = 10
defaultWarmup = 2
defaultMaxRatio = 1.5
defaultMaxMS = 30_000
defaultTimeoutMS = 60_000
)
// LoadCases reads every *.yaml file in dir, sorted by name.
func LoadCases(dir string) ([]Case, error) {
entries, err := os.ReadDir(dir)
if err != nil {
return nil, fmt.Errorf("read cases dir %s: %w", dir, err)
}
var paths []string
for _, e := range entries {
if e.IsDir() || !strings.HasSuffix(e.Name(), ".yaml") {
continue
}
paths = append(paths, filepath.Join(dir, e.Name()))
}
sort.Strings(paths)
out := make([]Case, 0, len(paths))
for _, p := range paths {
c, err := LoadCase(p)
if err != nil {
return nil, err
}
out = append(out, c)
}
return out, nil
}
// LoadCase parses a single benchmark case file.
func LoadCase(path string) (Case, error) {
raw, err := os.ReadFile(path) //nolint:gosec // case path comes from bench/cases discovery
if err != nil {
return Case{}, fmt.Errorf("read %s: %w", path, err)
}
var c Case
if err := yaml.Unmarshal(raw, &c); err != nil {
return Case{}, fmt.Errorf("parse %s: %w", path, err)
}
if c.Name == "" {
c.Name = strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
}
if c.Query == "" {
return Case{}, fmt.Errorf("%s: query is required", path)
}
if c.Iterations <= 0 {
c.Iterations = defaultIterations
}
if c.Warmup < 0 {
c.Warmup = 0
}
if c.Warmup >= c.Iterations {
c.Warmup = max(c.Iterations-1, 0)
}
if c.MaxRatio <= 0 {
c.MaxRatio = defaultMaxRatio
}
if c.MaxMS <= 0 {
c.MaxMS = defaultMaxMS
}
if c.ProjectID == "" {
c.ProjectID = "bench-" + c.Name
}
c.Path = path
c.ContentHash = hashContent(string(raw))
return c, nil
}
// SkippedFor reports whether a target should not run this case.
func (c Case) SkippedFor(target TargetName) (bool, string) {
if slices.Contains(c.SkipTargets, target) {
reason := c.SkipReason
if reason == "" {
reason = "skipped for " + string(target)
}
return true, reason
}
return false, ""
}
// Substitute replaces {{ds}} and {{project}} placeholders.
func (c Case) Substitute(dataset, project string) (setup []string, query string) {
repl := func(s string) string {
s = strings.ReplaceAll(s, "{{ds}}", dataset)
s = strings.ReplaceAll(s, "{{project}}", project)
return s
}
setup = make([]string, len(c.SetupSQL))
for i, s := range c.SetupSQL {
setup[i] = repl(s)
}
return setup, repl(c.Query)
}
// UnmarshalYAML accepts setup as {sql: ...} objects.
func (c *Case) UnmarshalYAML(value *yaml.Node) error {
type plain struct {
Name string `yaml:"name"`
Tags []string `yaml:"tags,omitempty"`
Query string `yaml:"query"`
Iterations int `yaml:"iterations,omitempty"`
Warmup int `yaml:"warmup,omitempty"`
MaxRatio float64 `yaml:"max_ratio,omitempty"`
MaxMS int64 `yaml:"max_ms,omitempty"`
ProjectID string `yaml:"project_id,omitempty"`
SkipTargets []TargetName `yaml:"skip_targets,omitempty"`
SkipReason string `yaml:"skip_reason,omitempty"`
Setup []struct {
SQL string `yaml:"sql"`
} `yaml:"setup"`
}
var aux plain
if err := value.Decode(&aux); err != nil {
return err
}
c.Name = aux.Name
c.Tags = aux.Tags
c.Query = aux.Query
c.Iterations = aux.Iterations
c.Warmup = aux.Warmup
c.MaxRatio = aux.MaxRatio
c.MaxMS = aux.MaxMS
c.ProjectID = aux.ProjectID
c.SkipTargets = aux.SkipTargets
c.SkipReason = aux.SkipReason
c.SetupSQL = make([]string, 0, len(aux.Setup))
for _, step := range aux.Setup {
if step.SQL != "" {
c.SetupSQL = append(c.SetupSQL, step.SQL)
}
}
return nil
}
func hashContent(s string) string {
sum := sha256.Sum256([]byte(s))
return hex.EncodeToString(sum[:8])
}
// QueryTimeout returns the wall-clock cap for query iterations. Cases
// that set max_ms above the default baseline cap use that value so
// slow targets (notably goccy on large joins) can finish.
func (c Case) QueryTimeout(fallback time.Duration) time.Duration {
if c.MaxMS > defaultMaxMS {
return time.Duration(c.MaxMS) * time.Millisecond
}
return fallback
}
package runner
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"time"
conf "github.com/vantaboard/bigquery-emulator/conformance/runner"
)
const defaultEngineBinary = "./bin/emulator_main"
// EmulatorTarget drives the in-repo emulator via an in-process gateway.
type EmulatorTarget struct {
opts TargetOptions
env *conf.EmulatorEnv
client *RESTClient
}
func NewEmulatorTarget(opts TargetOptions) *EmulatorTarget {
return &EmulatorTarget{opts: opts}
}
func (t *EmulatorTarget) Name() TargetName { return TargetEmulator }
func (t *EmulatorTarget) Start(ctx context.Context) error {
if runtime.GOOS == "windows" {
return errors.New("emulator benchmarks require POSIX subprocess support")
}
bin := t.opts.EngineBinary
if bin == "" {
bin = resolveEngineBinary()
}
profile, ok := conf.LookupProfile(conf.ProfileDuckDB)
if !ok {
return fmt.Errorf("profile %q not found", conf.ProfileDuckDB)
}
env, err := conf.StartEmulator(ctx, conf.HarnessOptions{
EngineBinary: bin,
EngineStdout: os.Stderr,
EngineStderr: os.Stderr,
}, profile)
if err != nil {
return err
}
t.env = env
t.client = NewRESTClient(env.BaseURL, "bench")
return nil
}
func (t *EmulatorTarget) SetupCase(ctx context.Context, c Case, dataset string) error {
setup, _ := c.Substitute(dataset, c.ProjectID)
base := fmt.Sprintf("%s/bigquery/v2/projects/%s", t.env.BaseURL, c.ProjectID)
for _, sql := range setup {
if err := conf.SetupSQLViaGateway(ctx, base, sql); err != nil {
return err
}
}
return nil
}
func (t *EmulatorTarget) RunQuery(ctx context.Context, c Case, sql string, timeout time.Duration) (QueryResult, error) {
t.client.ProjectID = c.ProjectID
if timeout <= 0 {
timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
}
return timedQuery(ctx, func(ctx context.Context) (QueryResult, error) {
start := time.Now()
status, body, err := t.client.PostQuery(ctx, sql)
if err != nil {
return QueryResult{Error: err.Error()}, err
}
elapsed := time.Since(start)
if status < 200 || status >= 300 {
return QueryResult{Elapsed: elapsed, Error: fmt.Sprintf("HTTP %d: %s", status, snippet(body))},
fmt.Errorf("query failed: HTTP %d", status)
}
resp, err := ParseQueryResponse(body)
if err != nil {
return QueryResult{Elapsed: elapsed, Error: err.Error()}, err
}
rows := RESTRowsToMaps(resp.Schema, resp.Rows)
hash, _ := HashRows(rows)
out := QueryResult{
Elapsed: elapsed,
Rows: rows,
RowCount: len(rows),
ResultHash: hash,
}
if resp.Statistics != nil && resp.Statistics.Query != nil {
out.Route = resp.Statistics.Query.EmulatorRoute
out.Phases = resp.Statistics.Query.EmulatorPhases
}
return out, nil
}, timeout)
}
func (t *EmulatorTarget) Cleanup(context.Context) error {
if t.env != nil {
return t.env.Close()
}
return nil
}
func snippet(b []byte) string {
const max = 200
if len(b) <= max {
return string(b)
}
return string(b[:max]) + "..."
}
func resolveEngineBinary() string {
if p := os.Getenv("BIGQUERY_EMULATOR_BIN"); p != "" {
if _, err := os.Stat(p); err == nil { //nolint:gosec // engine binary path is operator-supplied
return p
}
}
candidates := []string{defaultEngineBinary, filepath.Join("bin", "emulator_main")}
for _, c := range candidates {
if _, err := os.Stat(c); err == nil { //nolint:gosec // candidate paths are bench-owned defaults
return c
}
}
return defaultEngineBinary
}
var _ Target = (*EmulatorTarget)(nil)
package runner
import (
"bufio"
"context"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"strings"
"time"
)
const defaultGoccyImage = "ghcr.io/goccy/bigquery-emulator:0.8.1"
// goccyProject is the single project the goccy container is started
// with. goccy/bigquery-emulator 404s on any other project id, so every
// case runs under this one (dataset names are unique per case).
const goccyProject = "bench"
// DefaultGoccyImage returns the pinned goccy container reference.
func DefaultGoccyImage() string { return defaultGoccyImage }
// GoccyTarget drives the goccy/bigquery-emulator Docker image.
type GoccyTarget struct {
opts TargetOptions
container string
hostPort int
client *RESTClient
httpClient *http.Client
logsCancel context.CancelFunc
}
func NewGoccyTarget(opts TargetOptions) *GoccyTarget {
return &GoccyTarget{opts: opts}
}
func (t *GoccyTarget) Name() TargetName { return TargetGoccy }
func (t *GoccyTarget) Start(ctx context.Context) error {
image := t.opts.GoccyImage
if image == "" {
image = defaultGoccyImage
}
port, err := freePort()
if err != nil {
return err
}
t.hostPort = port
name := fmt.Sprintf("bq-bench-goccy-%d", time.Now().UnixNano())
args := []string{
"run", "--rm", "-d",
"--name", name,
"-p", fmt.Sprintf("127.0.0.1:%d:9050", port),
image,
"--project=" + goccyProject,
"--log-level=debug",
}
cmd := exec.CommandContext(ctx, "docker", args...) //nolint:gosec // bench operator supplies the image ref
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("docker run %s: %w: %s", image, err, strings.TrimSpace(string(out)))
}
t.container = name
t.startLogFollower()
t.httpClient = &http.Client{Timeout: 0}
if err := t.waitReady(ctx); err != nil {
_ = t.Cleanup(ctx)
return err
}
t.client = &RESTClient{
BaseURL: fmt.Sprintf("http://127.0.0.1:%d", port),
ProjectID: "bench",
HTTP: t.httpClient,
}
return nil
}
func (t *GoccyTarget) waitReady(ctx context.Context) error {
deadline, ok := ctx.Deadline()
if !ok {
deadline = time.Now().Add(60 * time.Second)
}
url := fmt.Sprintf("http://127.0.0.1:%d/bigquery/v2/projects/bench/queries", t.hostPort)
body := []byte(`{"query":"SELECT 1","useLegacySql":false}`)
for time.Now().Before(deadline) {
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, strings.NewReader(string(body)))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := t.httpClient.Do(req)
if err == nil {
_ = resp.Body.Close()
if resp.StatusCode >= 200 && resp.StatusCode < 500 {
return nil
}
}
time.Sleep(500 * time.Millisecond)
}
return fmt.Errorf("goccy emulator on port %d not ready", t.hostPort)
}
func (t *GoccyTarget) EnsureReady(ctx context.Context) error {
if t.client == nil {
return t.Start(ctx)
}
pingCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := t.ping(pingCtx); err == nil {
return nil
}
return t.restart(ctx)
}
func (t *GoccyTarget) ping(ctx context.Context) error {
status, body, err := t.client.PostQuery(ctx, "SELECT 1")
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("ping HTTP %d: %s", status, snippet(body))
}
return nil
}
func (t *GoccyTarget) restart(ctx context.Context) error {
_ = t.Cleanup(ctx)
return t.Start(ctx)
}
func (t *GoccyTarget) SetupCase(ctx context.Context, c Case, dataset string) error {
setup, _ := c.Substitute(dataset, goccyProject)
t.client.ProjectID = goccyProject
setupTimeout := c.QueryTimeout(time.Duration(defaultTimeoutMS) * time.Millisecond)
if err := t.client.CreateDataset(ctx, dataset); err != nil {
return err
}
for _, sql := range setup {
setupCtx, cancel := context.WithTimeout(ctx, setupTimeout)
status, body, err := t.client.PostQuery(setupCtx, sql)
cancel()
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("setup sql -> HTTP %d: %s", status, snippet(body))
}
}
return nil
}
func (t *GoccyTarget) RunQuery(ctx context.Context, c Case, sql string, timeout time.Duration) (QueryResult, error) {
t.client.ProjectID = goccyProject
if timeout <= 0 {
timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
}
return timedQuery(ctx, func(ctx context.Context) (QueryResult, error) {
timedSQL, err := prepareGoccyDDLQuery(ctx, t.client, sql)
if err != nil {
return QueryResult{Error: err.Error()}, err
}
status, body, err := t.client.PostQuery(ctx, timedSQL)
if err != nil {
return QueryResult{Error: err.Error()}, err
}
if status < 200 || status >= 300 {
return QueryResult{Error: fmt.Sprintf("HTTP %d: %s", status, snippet(body))},
fmt.Errorf("query failed: HTTP %d", status)
}
resp, err := ParseQueryResponse(body)
if err != nil {
return QueryResult{Error: err.Error()}, err
}
rows := RESTRowsToMaps(resp.Schema, resp.Rows)
hash, _ := HashRows(rows)
return QueryResult{
Rows: rows,
RowCount: len(rows),
ResultHash: hash,
}, nil
}, timeout)
}
func (t *GoccyTarget) startLogFollower() {
if t.container == "" {
return
}
logCtx, cancel := context.WithCancel(context.Background())
t.logsCancel = cancel
go func() {
// #nosec G204 -- container name is bench-owned.
cmd := exec.CommandContext(
logCtx,
"docker",
"logs",
"-f",
t.container,
)
stdout, err := cmd.StdoutPipe()
if err != nil {
return
}
if err := cmd.Start(); err != nil {
return
}
defer func() { _ = stdout.Close() }()
streamPrefixedLines(stdout, "[goccy] ")
_ = cmd.Wait()
}()
}
func streamPrefixedLines(r io.Reader, prefix string) {
sc := bufio.NewScanner(r)
for sc.Scan() {
_, _ = fmt.Fprintf(os.Stderr, "%s%s\n", prefix, sc.Text())
}
}
func (t *GoccyTarget) Cleanup(ctx context.Context) error {
if t.logsCancel != nil {
t.logsCancel()
t.logsCancel = nil
}
if t.container == "" {
return nil
}
cmd := exec.CommandContext(ctx, "docker", "rm", "-f", t.container) //nolint:gosec // container name is bench-owned
_ = cmd.Run()
t.container = ""
return nil
}
// ImageTag extracts the tag from a full docker image reference.
func ImageTag(image string) string {
if i := strings.LastIndex(image, ":"); i >= 0 {
return image[i+1:]
}
return image
}
var _ Target = (*GoccyTarget)(nil)
package runner
import (
"context"
"fmt"
"regexp"
"strings"
)
// createOrReplaceAs matches CREATE OR REPLACE TABLE/VIEW ... AS (CTAS / view body).
// Bench DDL cases use this shape; goccy 0.8.1 treats a second CREATE as duplicate
// rather than honoring OR REPLACE, so we DROP IF EXISTS then CREATE before timing.
var createOrReplaceAsRE = regexp.MustCompile(
`(?is)^CREATE\s+OR\s+REPLACE\s+(TABLE|VIEW)\s+(\S+)\s+AS\s+`,
)
// rewriteGoccyCreateOrReplace splits CREATE OR REPLACE TABLE/VIEW ... AS into an
// idempotent DROP + CREATE pair for goccy. ok is false when sql is not that shape.
func rewriteGoccyCreateOrReplace(sql string) (dropSQL, createSQL string, ok bool) {
trimmed := strings.TrimSpace(sql)
m := createOrReplaceAsRE.FindStringSubmatch(trimmed)
if m == nil {
return "", trimmed, false
}
kind := strings.ToUpper(m[1])
object := m[2]
dropSQL = fmt.Sprintf("DROP %s IF EXISTS %s", kind, object)
createSQL = createOrReplaceAsRE.ReplaceAllString(trimmed, "CREATE "+kind+" "+object+" AS ")
return dropSQL, createSQL, true
}
// prepareGoccyDDLQuery runs an untimed DROP IF EXISTS when sql is CREATE OR REPLACE
// TABLE/VIEW ... AS, then returns equivalent CREATE ... AS for the timed iteration.
func prepareGoccyDDLQuery(ctx context.Context, client *RESTClient, sql string) (string, error) {
dropSQL, createSQL, rewrite := rewriteGoccyCreateOrReplace(sql)
if !rewrite {
return strings.TrimSpace(sql), nil
}
status, body, err := client.PostQuery(ctx, dropSQL)
if err != nil {
return "", fmt.Errorf("goccy ddl preamble drop: %w", err)
}
if status < 200 || status >= 300 {
return "", fmt.Errorf("goccy ddl preamble drop -> HTTP %d: %s", status, snippet(body))
}
return createSQL, nil
}
package runner
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"maps"
"sort"
"strconv"
)
// NormalizeRows returns a deterministic JSON encoding for hashing.
func NormalizeRows(rows []map[string]string) ([]byte, error) {
cp := make([]map[string]string, len(rows))
for i, r := range rows {
cp[i] = make(map[string]string, len(r))
maps.Copy(cp[i], r)
}
sort.Slice(cp, func(i, j int) bool {
a, _ := json.Marshal(cp[i])
b, _ := json.Marshal(cp[j])
return string(a) < string(b)
})
return json.Marshal(cp)
}
// HashRows returns SHA-256 hex digest of normalized rows.
func HashRows(rows []map[string]string) (string, error) {
norm, err := NormalizeRows(rows)
if err != nil {
return "", err
}
sum := sha256.Sum256(norm)
return hex.EncodeToString(sum[:]), nil
}
// RowsFromBQ converts REST rows to map form for hashing.
func RowsFromBQ(rows []map[string]any) []map[string]string {
out := make([]map[string]string, 0, len(rows))
for _, row := range rows {
m := make(map[string]string, len(row))
for k, v := range row {
m[k] = cellToString(v)
}
out = append(out, m)
}
return out
}
func cellToString(v any) string {
switch t := v.(type) {
case nil:
return ""
case string:
return t
case float64:
return jsonNumber(t)
case bool:
if t {
return "true"
}
return "false"
default:
b, _ := json.Marshal(t)
return string(b)
}
}
func jsonNumber(f float64) string {
if f == float64(int64(f)) {
return strconv.FormatInt(int64(f), 10)
}
return strconv.FormatFloat(f, 'f', -1, 64)
}
package runner
import "time"
const minRatioMS int64 = 1
// LatencyP50MS returns the primary BigQuery server-side latency (execution p50).
// Falls back to total client wall-clock for legacy baselines.
func (b BaselineCase) LatencyP50MS() int64 {
if b.ExecutionP50MS > 0 {
return b.ExecutionP50MS
}
return b.TotalP50MS
}
// LatencyP50ForRatio returns a safe BQ denominator for ratio math (minimum 1ms).
func (b BaselineCase) LatencyP50ForRatio() int64 {
ms := b.LatencyP50MS()
if ms <= 0 {
return minRatioMS
}
return ms
}
// CompareLatencyP50 returns the emulator latency used for baseline comparison.
// Prefers server-side total_engine; falls back to HTTP wall-clock.
func (r CaseResult) CompareLatencyP50() time.Duration {
if r.EngineP50 > 0 {
return r.EngineP50
}
return r.Latency.P50
}
// CompareLatencyMSForRatio returns emulator latency in milliseconds with a 1ms floor.
func (r CaseResult) CompareLatencyMSForRatio() int64 {
ms := r.CompareLatencyP50().Milliseconds()
if ms <= 0 {
return minRatioMS
}
return ms
}
// EngineP50FromPhases extracts total_engine p50 from aggregated phase stats.
func EngineP50FromPhases(phases PhaseStats) time.Duration {
if phases == nil {
return 0
}
if stats, ok := phases["total_engine"]; ok {
return stats.P50
}
return 0
}
package runner
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// RESTClient issues jobs.query calls against a BigQuery REST emulator.
type RESTClient struct {
BaseURL string
ProjectID string
HTTP *http.Client
}
func NewRESTClient(baseURL, projectID string) *RESTClient {
return &RESTClient{
BaseURL: baseURL,
ProjectID: projectID,
HTTP: &http.Client{Timeout: 0},
}
}
// CreateDataset registers a dataset on the emulator. HTTP 409 (already exists)
// is treated as success so repeated case setup is idempotent.
func (c *RESTClient) CreateDataset(ctx context.Context, datasetID string) error {
body, err := json.Marshal(map[string]any{
"datasetReference": map[string]string{
"datasetId": datasetID,
"projectId": c.ProjectID,
},
})
if err != nil {
return err
}
url := fmt.Sprintf("%s/bigquery/v2/projects/%s/datasets", c.BaseURL, c.ProjectID)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.HTTP.Do(req)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
data, err := io.ReadAll(resp.Body)
if err != nil {
return err
}
if resp.StatusCode == http.StatusConflict {
return nil
}
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("create dataset %s -> HTTP %d: %s", datasetID, resp.StatusCode, snippet(data))
}
return nil
}
func (c *RESTClient) PostQuery(ctx context.Context, sql string) (int, []byte, error) {
body, err := json.Marshal(map[string]any{
"query": sql,
"useLegacySql": false,
})
if err != nil {
return 0, nil, err
}
url := fmt.Sprintf("%s/bigquery/v2/projects/%s/queries", c.BaseURL, c.ProjectID)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return 0, nil, err
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.HTTP.Do(req)
if err != nil {
return 0, nil, err
}
defer func() { _ = resp.Body.Close() }()
data, err := io.ReadAll(resp.Body)
return resp.StatusCode, data, err
}
// ParseQueryResponse decodes a successful jobs.query body.
func ParseQueryResponse(body []byte) (bqtypes.QueryResponse, error) {
var out bqtypes.QueryResponse
if err := json.Unmarshal(body, &out); err != nil {
return out, err
}
return out, nil
}
// RESTRowsToMaps converts REST f/v rows to string maps.
func RESTRowsToMaps(schema *bqtypes.TableSchema, rows []bqtypes.Row) []map[string]string {
if schema == nil {
return nil
}
names := make([]string, len(schema.Fields))
for i, f := range schema.Fields {
names[i] = f.Name
}
out := make([]map[string]string, 0, len(rows))
for _, row := range rows {
m := make(map[string]string, len(names))
for i, name := range names {
if i < len(row.F) {
m[name] = cellToString(row.F[i].V)
}
}
out = append(out, m)
}
return out
}
func timedQuery(
ctx context.Context,
fn func(context.Context) (QueryResult, error),
timeout time.Duration,
) (QueryResult, error) {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
start := time.Now()
res, err := fn(ctx)
res.Elapsed = time.Since(start)
return res, err
}
package runner
import (
"encoding/json"
"fmt"
"io"
"os"
"slices"
"strings"
"time"
)
// CaseResult is the aggregated outcome for one (case, target) pair.
type CaseResult struct {
CaseName string `json:"case_name"`
Target TargetName `json:"target"`
ContentHash string `json:"content_hash,omitempty"`
Outcome Outcome `json:"outcome"`
Error string `json:"error,omitempty"`
Latency LatencyStats `json:"latency"`
EngineP50 time.Duration `json:"engine_p50,omitempty"`
ExecutionP50 time.Duration `json:"execution_p50,omitempty"`
QueueP50 time.Duration `json:"queue_p50,omitempty"`
TotalSlotMsP50 int64 `json:"total_slot_ms_p50,omitempty"`
BytesProcessed int64 `json:"bytes_processed,omitempty"`
Route string `json:"route,omitempty"`
Phases PhaseStats `json:"phases,omitempty"`
ResultHash string `json:"result_hash,omitempty"`
RowCount int `json:"row_count,omitempty"`
Pass *bool `json:"pass,omitempty"`
CompareReason string `json:"compare_reason,omitempty"`
BQExecutionP50MS int64 `json:"bq_execution_p50_ms,omitempty"`
Ratio float64 `json:"ratio_vs_bq,omitempty"`
}
// RunReport is the machine-readable benchmark output.
type RunReport struct {
Timestamp time.Time `json:"timestamp"`
CommitSHA string `json:"commit_sha,omitempty"`
Host string `json:"host,omitempty"`
GoccyImage string `json:"goccy_image,omitempty"`
Targets []TargetName `json:"targets"`
Results []CaseResult `json:"results"`
}
// SaveReport writes JSON results.
func SaveReport(path string, r RunReport) error {
r.Timestamp = r.Timestamp.UTC()
raw, err := json.MarshalIndent(r, "", " ")
if err != nil {
return err
}
return os.WriteFile(path, append(raw, '\n'), 0o644) //nolint:gosec // 0o644 is fine for benchmark output JSON
}
// MergeReport overlays fresh benchmark results onto an existing report,
// replacing any (case_name, target) rows that appear in the fresh run and
// preserving everything else. This makes partial reruns (e.g.
// --case create_view_100k) update only the cases that ran instead of
// discarding the rest of bench/results.json.
func MergeReport(existing, fresh RunReport) RunReport {
out := existing
out.Timestamp = fresh.Timestamp
if fresh.CommitSHA != "" {
out.CommitSHA = fresh.CommitSHA
}
if fresh.Host != "" {
out.Host = fresh.Host
}
if fresh.GoccyImage != "" {
out.GoccyImage = fresh.GoccyImage
}
if len(fresh.Targets) > 0 {
out.Targets = fresh.Targets
}
freshKeys := make(map[string]struct{}, len(fresh.Results))
for _, r := range fresh.Results {
freshKeys[resultKey(r)] = struct{}{}
}
kept := make([]CaseResult, 0, len(existing.Results))
for _, r := range existing.Results {
if _, replace := freshKeys[resultKey(r)]; replace {
continue
}
kept = append(kept, r)
}
kept = append(kept, fresh.Results...)
out.Results = kept
slices.SortFunc(out.Results, func(a, b CaseResult) int {
if c := strings.Compare(a.CaseName, b.CaseName); c != 0 {
return c
}
return strings.Compare(string(a.Target), string(b.Target))
})
return out
}
func resultKey(r CaseResult) string {
return r.CaseName + "\x00" + string(r.Target)
}
// LoadReport reads a results JSON file.
func LoadReport(path string) (RunReport, error) {
raw, err := os.ReadFile(path) //nolint:gosec // report path is CLI-controlled
if err != nil {
return RunReport{}, err
}
var r RunReport
if err := json.Unmarshal(raw, &r); err != nil {
return RunReport{}, err
}
return r, nil
}
// PrintTextReport renders a human-readable summary.
func PrintTextReport(w io.Writer, report RunReport, baseline *BaselineFile) {
_, _ = fmt.Fprintf(w, "benchmark report @ %s\n", report.Timestamp.Format(time.RFC3339))
if report.CommitSHA != "" {
_, _ = fmt.Fprintf(w, "commit: %s\n", report.CommitSHA)
}
if report.GoccyImage != "" {
_, _ = fmt.Fprintf(w, "goccy image: %s\n", report.GoccyImage)
}
_, _ = fmt.Fprintf(w, "\n%-24s %-10s %-8s %-10s %-12s %-8s %s\n",
"case", "target", "outcome", "p50", "route", "rows", "notes")
for _, r := range report.Results {
notes := r.CompareReason
if notes == "" && r.Error != "" {
notes = r.Error
}
_, _ = fmt.Fprintf(w, "%-24s %-10s %-8s %-10s %-12s %-8d %s\n",
r.CaseName, r.Target, r.Outcome, r.Latency.P50, r.Route, r.RowCount, notes)
}
if baseline != nil {
ok, total := 0, 0
for _, r := range report.Results {
if r.Target == TargetEmulator {
total++
if r.Pass != nil && *r.Pass {
ok++
}
}
}
_, _ = fmt.Fprintf(w, "\nemulator vs baseline: %d/%d passed\n", ok, total)
}
}
package runner
import (
"context"
"errors"
"fmt"
"os"
"strings"
"time"
)
// RunOptions configures a benchmark execution.
type RunOptions struct {
CasesDir string
CaseFilter string
Targets []Target
Timeout time.Duration
Baseline *BaselineFile
Compare bool
// Progress receives human-readable progress lines as the run
// advances (target startup, per-case setup, per-iteration
// completions). nil disables progress output.
Progress func(format string, args ...any)
}
func (o RunOptions) logf(format string, args ...any) {
if o.Progress != nil {
o.Progress(format, args...)
}
}
// Run executes all cases against the configured targets.
func Run(ctx context.Context, opts RunOptions) (RunReport, error) {
cases, err := LoadCases(opts.CasesDir)
if err != nil {
return RunReport{}, err
}
cases, err = filterCases(cases, opts.CaseFilter)
if err != nil {
return RunReport{}, err
}
timeout := opts.Timeout
if timeout <= 0 {
timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
}
report := RunReport{
Timestamp: time.Now().UTC(),
CommitSHA: os.Getenv("GITHUB_SHA"),
Host: hostname(),
Targets: targetNames(opts.Targets),
}
for _, target := range opts.Targets {
opts.logf("starting target %s...", target.Name())
startBegan := time.Now()
if err := target.Start(ctx); err != nil {
return report, fmt.Errorf("start %s: %w", target.Name(), err)
}
opts.logf("target %s ready in %s", target.Name(), time.Since(startBegan).Round(time.Millisecond))
defer func(t Target) { _ = t.Cleanup(ctx) }(target)
}
for ci, c := range cases {
runCaseAcrossTargets(ctx, opts, &report, cases, ci, c, timeout)
}
return report, nil
}
func runCaseAcrossTargets(
ctx context.Context,
opts RunOptions,
report *RunReport,
cases []Case,
ci int,
c Case,
timeout time.Duration,
) {
dataset := datasetForCase(c.Name)
for _, target := range opts.Targets {
cr, run := prepareCaseRun(ctx, opts, ci, len(cases), c, target)
if !run {
report.Results = append(report.Results, cr)
continue
}
opts.logf("[%d/%d] %s on %s: setup...", ci+1, len(cases), c.Name, target.Name())
cr = runCase(ctx, opts, target, c, dataset, timeout)
if gt, ok := target.(*GoccyTarget); ok && cr.Outcome == OutcomeError {
_ = gt.EnsureReady(ctx)
}
logCaseResult(opts, ci+1, len(cases), c, target, cr)
cr = enrichWithBaseline(opts, target, c, cr)
report.Results = append(report.Results, cr)
}
}
// prepareCaseRun handles skip and goccy health checks. The bool is
// false when the caller should append cr and continue without running.
func prepareCaseRun(
ctx context.Context,
opts RunOptions,
index, total int,
c Case,
target Target,
) (CaseResult, bool) {
if skipped, reason := c.SkippedFor(target.Name()); skipped {
cr := CaseResult{
CaseName: c.Name,
Target: target.Name(),
ContentHash: c.ContentHash,
Outcome: OutcomeSkipped,
Error: reason,
}
logCaseResult(opts, index+1, total, c, target, cr)
return cr, false
}
if gt, ok := target.(*GoccyTarget); ok {
if err := gt.EnsureReady(ctx); err != nil {
cr := CaseResult{
CaseName: c.Name,
Target: target.Name(),
ContentHash: c.ContentHash,
Outcome: OutcomeError,
Error: fmt.Sprintf("goccy not ready: %v", err),
}
logCaseResult(opts, index+1, total, c, target, cr)
return cr, false
}
}
return CaseResult{}, true
}
func filterCases(cases []Case, name string) ([]Case, error) {
if name == "" {
return cases, nil
}
filtered := cases[:0]
for _, c := range cases {
if c.Name == name {
filtered = append(filtered, c)
}
}
if len(filtered) == 0 {
return nil, fmt.Errorf("case %q not found", name)
}
return filtered, nil
}
func logCaseResult(opts RunOptions, index, total int, c Case, target Target, cr CaseResult) {
switch cr.Outcome {
case OutcomeOK:
opts.logf("[%d/%d] %s on %s: done (p50 %s, %d rows)",
index, total, c.Name, target.Name(),
cr.Latency.P50.Round(time.Millisecond), cr.RowCount)
case OutcomeSkipped:
opts.logf("[%d/%d] %s on %s: skipped (%s)",
index, total, c.Name, target.Name(), cr.Error)
default:
opts.logf("[%d/%d] %s on %s: %s (%s)",
index, total, c.Name, target.Name(), cr.Outcome, cr.Error)
}
}
func enrichWithBaseline(opts RunOptions, target Target, c Case, cr CaseResult) CaseResult {
if opts.Compare && opts.Baseline != nil && target.Name() == TargetEmulator {
if base, ok := opts.Baseline.Cases[c.Name]; ok {
pass, reason := CompareToBaseline(c, base, cr)
cr.Pass = &pass
cr.CompareReason = reason
cr.BQExecutionP50MS = base.LatencyP50MS()
bqDenom := base.LatencyP50ForRatio()
emuNum := cr.CompareLatencyMSForRatio()
if bqDenom > 0 && emuNum > 0 {
cr.Ratio = float64(emuNum) / float64(bqDenom)
}
} else {
pass := false
cr.Pass = &pass
cr.CompareReason = "no baseline for case"
}
}
if opts.Baseline != nil && cr.Outcome == OutcomeOK && cr.ResultHash != "" {
if base, ok := opts.Baseline.Cases[c.Name]; ok && base.ResultHash != "" &&
base.ResultHash != cr.ResultHash {
cr.Outcome = OutcomeWrongResult
if target.Name() == TargetEmulator && cr.Pass != nil {
pass := false
cr.Pass = &pass
cr.CompareReason = "result hash mismatch vs baseline"
}
}
}
return cr
}
func runCase(
ctx context.Context,
opts RunOptions,
target Target,
c Case,
dataset string,
timeout time.Duration,
) CaseResult {
project := c.ProjectID
switch tt := target.(type) {
case *BigQueryTarget:
project = tt.ProjectID()
case *GoccyTarget:
project = goccyProject
}
dsRef := datasetRef(target.Name(), project, dataset)
setupBegan := time.Now()
if setupErr := target.SetupCase(ctx, c, dsRef); setupErr != nil {
return CaseResult{
CaseName: c.Name,
Target: target.Name(),
ContentHash: c.ContentHash,
Outcome: OutcomeError,
Error: setupErr.Error(),
}
}
opts.logf(" %s on %s: setup done in %s, running %d iterations...",
c.Name, target.Name(), time.Since(setupBegan).Round(time.Millisecond), c.Iterations)
_, query := c.Substitute(dsRef, project)
caseTimeout := c.QueryTimeout(timeout)
samples, execSamples, queueSamples, slotSamples, phaseIters, last, outcome, lastErr := runQueryIterations(
ctx, opts, target, c, query, caseTimeout)
phases := ComputePhaseStats(phaseIters, c.Warmup)
cr := CaseResult{
CaseName: c.Name,
Target: target.Name(),
ContentHash: c.ContentHash,
Outcome: outcome,
Error: lastErr,
Latency: ComputeLatencyStats(samples, c.Warmup),
Phases: phases,
EngineP50: EngineP50FromPhases(phases),
Route: last.Route,
ResultHash: last.ResultHash,
RowCount: last.RowCount,
BytesProcessed: last.BytesProcessed,
}
if len(execSamples) > 0 {
cr.ExecutionP50 = ComputeLatencyStats(execSamples, c.Warmup).P50
}
if len(queueSamples) > 0 {
cr.QueueP50 = ComputeLatencyStats(queueSamples, c.Warmup).P50
}
if len(slotSamples) > 0 {
cr.TotalSlotMsP50 = ComputeInt64P50(slotSamples, c.Warmup)
}
return cr
}
func runQueryIterations(
ctx context.Context,
opts RunOptions,
target Target,
c Case,
query string,
timeout time.Duration,
) (
samples, execSamples, queueSamples []time.Duration,
slotSamples []int64,
phaseIters []map[string]int64,
last QueryResult,
outcome Outcome,
lastErr string,
) {
outcome = OutcomeOK
for i := 0; i < c.Iterations; i++ {
res, err := target.RunQuery(ctx, c, query, timeout)
last = res
if err != nil {
if errors.Is(err, context.DeadlineExceeded) || ctx.Err() == context.DeadlineExceeded {
outcome = OutcomeTimeout
lastErr = "timeout"
break
}
outcome = OutcomeError
lastErr = res.Error
if lastErr == "" {
lastErr = err.Error()
}
break
}
logQueryIteration(opts, c, target, i, res)
samples = append(samples, res.Elapsed)
if res.ExecutionValid {
execSamples = append(execSamples, res.ExecutionOnly)
}
if res.QueueOnly > 0 {
queueSamples = append(queueSamples, res.QueueOnly)
}
if res.ExecutionValid {
slotSamples = append(slotSamples, res.SlotMs)
}
if len(res.Phases) > 0 {
phaseIters = append(phaseIters, res.Phases)
}
}
return samples, execSamples, queueSamples, slotSamples, phaseIters, last, outcome, lastErr
}
func logQueryIteration(opts RunOptions, c Case, target Target, i int, res QueryResult) {
label := ""
if i < c.Warmup {
label = " (warmup)"
}
opts.logf(" %s on %s: iteration %d/%d%s took %s",
c.Name, target.Name(), i+1, c.Iterations, label,
res.Elapsed.Round(time.Millisecond))
if target.Name() == TargetBigQuery && res.ExecutionValid {
clientOverhead := res.Elapsed - res.ExecutionOnly
opts.logf(" bq stats: execution=%s queue=%s slot_ms=%d client_overhead=%s",
res.ExecutionOnly.Round(time.Millisecond),
res.QueueOnly.Round(time.Millisecond),
res.SlotMs,
clientOverhead.Round(time.Millisecond))
}
}
func datasetForCase(name string) string {
return "ds_" + strings.ReplaceAll(name, "-", "_")
}
func datasetRef(target TargetName, project, dataset string) string {
if target == TargetBigQuery {
return project + "." + dataset
}
return dataset
}
func targetNames(targets []Target) []TargetName {
out := make([]TargetName, len(targets))
for i, t := range targets {
out[i] = t.Name()
}
return out
}
func hostname() string {
h, err := os.Hostname()
if err != nil {
return ""
}
return h
}
package runner
import (
"math"
"slices"
"time"
)
// LatencyStats summarizes repeated latency samples (post-warmup).
type LatencyStats struct {
Min time.Duration `json:"min"`
P50 time.Duration `json:"p50"`
P90 time.Duration `json:"p90"`
Max time.Duration `json:"max"`
N int `json:"n"`
}
// PhaseStats summarizes per-phase timings across iterations.
type PhaseStats map[string]LatencyStats
// ComputeLatencyStats returns percentiles for samples after warmup.
func ComputeLatencyStats(samples []time.Duration, warmup int) LatencyStats {
if len(samples) == 0 {
return LatencyStats{}
}
start := warmup
if start >= len(samples) {
start = len(samples) - 1
}
if start < 0 {
start = 0
}
used := append([]time.Duration(nil), samples[start:]...)
slices.Sort(used)
return LatencyStats{
Min: used[0],
P50: percentile(used, 0.50),
P90: percentile(used, 0.90),
Max: used[len(used)-1],
N: len(used),
}
}
// ComputePhaseStats aggregates phase timings (microseconds) across iterations.
func ComputePhaseStats(iterations []map[string]int64, warmup int) PhaseStats {
if len(iterations) == 0 {
return nil
}
start := warmup
if start >= len(iterations) {
start = len(iterations) - 1
}
names := map[string]struct{}{}
for i := start; i < len(iterations); i++ {
for k := range iterations[i] {
names[k] = struct{}{}
}
}
out := make(PhaseStats, len(names))
for name := range names {
var samples []time.Duration
for i := start; i < len(iterations); i++ {
if us, ok := iterations[i][name]; ok {
samples = append(samples, time.Duration(us)*time.Microsecond)
}
}
out[name] = ComputeLatencyStats(samples, 0)
}
return out
}
// ComputeInt64P50 returns the p50 of int64 samples after warmup.
func ComputeInt64P50(samples []int64, warmup int) int64 {
if len(samples) == 0 {
return 0
}
start := warmup
if start >= len(samples) {
start = len(samples) - 1
}
if start < 0 {
start = 0
}
used := append([]int64(nil), samples[start:]...)
slices.Sort(used)
idx := max(int(math.Round(0.50*float64(len(used)-1))), 0)
if idx >= len(used) {
idx = len(used) - 1
}
return used[idx]
}
func percentile(sorted []time.Duration, p float64) time.Duration {
if len(sorted) == 0 {
return 0
}
if len(sorted) == 1 {
return sorted[0]
}
idx := max(int(math.Round(p*float64(len(sorted)-1))), 0)
if idx >= len(sorted) {
idx = len(sorted) - 1
}
return sorted[idx]
}
package runner
import "net"
func freePort() (int, error) {
l, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
return 0, err
}
defer func() { _ = l.Close() }()
return l.Addr().(*net.TCPAddr).Port, nil
}
// CLI parsing for the gateway_main binary.
//
// The parser lives in its own file so it can be exercised by unit
// tests (cli_test.go) without forking a process. The output is a
// normalized Config struct that main() turns into a gateway.Options.
//
// # Flag aliasing
//
// Every operator-facing flag accepts both the legacy
// underscore-separated name this repository started with
// (`--http_port`) and the hyphen-separated equivalent documented for
// `gateway_main` (`--http-port`). Both names target
// the same parsed value; whichever appears last on the command line
// wins, the same way Go's `flag` package handles late overrides for
// any single flag. This keeps existing scripts/Taskfiles working
// while letting operators copy invocation snippets straight from the
// upstream documentation.
//
// A handful of flags additionally accept the names the widely-used
// goccy/bigquery-emulator exposes, so invocation snippets written for
// that emulator keep working here without rewriting:
//
// - `--port` -> alias for `--http-port`
// - `--project` -> alias for `--project-id`
// - `--data-from-yaml` -> alias for `--seed-data-file`
//
// These are pure aliases onto the same Config fields; the canonical
// names above remain the primary, `--help`-documented spelling.
//
// # Environment-variable fallbacks
//
// Three settings honor environment variables when the CLI flag is
// not supplied, per the stable compatibility contract in docs/SEEDING.md:
//
// - BIGQUERY_EMULATOR_INITIAL_DATA_DIR (also EMULATOR_INITIAL_DATA_DIR)
// populates --initial-data-dir.
// - BIGQUERY_EMULATOR_SEED_TOKEN populates --seed-api-seed-token.
// - BIGQUERY_EMULATOR_DATA_DIR populates --data-dir.
//
// All three are still overridable from the command line; the env
// vars are only consulted when the operator did not say anything.
package main
import (
"errors"
"flag"
"fmt"
"io"
"strconv"
"strings"
)
// Config is the normalized result of parsing argv. All operator-facing
// CLI flags collapse to one field here so the rest of the program can
// read settings without juggling pointer indirection or alias bookkeeping.
type Config struct {
// VersionRequested is true when `--version` was passed; main()
// short-circuits before any side effect.
VersionRequested bool
// ListenHost is the host the HTTP gateway binds to. Maps to
// `--listen-host` / `--hostname`.
ListenHost string
// HTTPPort is the BigQuery REST listener port. Maps to
// `--http-port` / `--http_port`.
HTTPPort int
// GRPCPort is the engine gRPC port. The gateway also dials this
// port via the loopback interface. Maps to `--grpc-port` /
// `--grpc_port`.
GRPCPort int
// EngineBinary is the absolute or basename path to the C++ engine
// subprocess. Empty disables the subprocess (gateway-only /
// unit-test mode). Maps to `--engine-binary` / `--engine_binary`.
EngineBinary string
// DataDir is the persistent storage root. Forwarded to the engine
// as `--data_dir`. Maps to `--data-dir` / `--data_dir` /
// `BIGQUERY_EMULATOR_DATA_DIR`.
DataDir string
// LegacyDatabase is the removed recidiviz/goccy `--database` flag
// (single SQLite file). When set, parseArgs maps it to DataDir =
// filepath.Dir(LegacyDatabase) and emits a migration warning.
LegacyDatabase string
// StartupWarnings holds operator-facing migration / layout notices
// collected during CLI parsing. main() logs them before binding.
StartupWarnings []string
// InitialDataDir is a template directory the gateway copies into
// DataDir on first start (when DataDir does not yet contain an
// initialized catalog). Maps to `--initial-data-dir` /
// `BIGQUERY_EMULATOR_INITIAL_DATA_DIR` / `EMULATOR_INITIAL_DATA_DIR`.
InitialDataDir string
// CopyEngineStdout / CopyEngineStderr forward the engine
// subprocess's stdio to the gateway's. Maps to
// `--copy-engine-stdout` / `--copy_engine_stdout` /
// `--copy-engine-stderr` / `--copy_engine_stderr`.
CopyEngineStdout bool
CopyEngineStderr bool
// LogRequests prints each REST request and response. Maps to
// `--log-requests` / `--log_requests`.
LogRequests bool
// Debug enables verbose lifecycle logging. Maps to `--debug`.
Debug bool
// DefaultProjectID is the default project clients act against
// when seeding or applying YAML data without an explicit project.
// Maps to `--project-id` / `--project_id`.
DefaultProjectID string
// DefaultDatasetID is the server-level fallback dataset used to
// resolve unqualified (single-segment) table names when a query
// or job does not carry its own `defaultDataset`. This mirrors
// setting `default_dataset` on a production BigQuery client/job so
// `SELECT * FROM t` / `CREATE TABLE t (...)` resolve to
// `<project>.<DefaultDatasetID>.t`. Maps to `--dataset` /
// `--dataset-id` / `--dataset_id`.
DefaultDatasetID string
// DefaultDatasetLocation is the BigQuery location stamped on
// datasets created without an explicit location. Maps to
// `--default-dataset-location`.
DefaultDatasetLocation string
// EnableSeedAPI registers the `POST /api/emulator/seed` route
// and its operation polling endpoint. Off by default for local
// safety.
EnableSeedAPI bool
// SeedAPIAllowRemote allows non-loopback callers to invoke the
// seed API. Off by default.
SeedAPIAllowRemote bool
// SeedAPISeedToken, when non-empty, requires matching header
// `X-BigQuery-Emulator-Seed-Token` on every seed API request.
// Falls back to `BIGQUERY_EMULATOR_SEED_TOKEN` when the flag is
// empty.
SeedAPISeedToken string
// EnableSQLToolsAPI registers POST /api/emulator/sql/* routes.
EnableSQLToolsAPI bool
// SQLToolsAPIAllowRemote allows non-loopback SQL tools callers.
SQLToolsAPIAllowRemote bool
// SQLToolsAPISeedToken protects remote SQL tools access.
SQLToolsAPISeedToken string
// SeedFiles is the repeatable list of YAML seed-data files to
// apply once the engine is SERVING. Maps to `--seed-data-file`
// / `--seed-yaml`.
SeedFiles []string
}
// envLookup mirrors `os.LookupEnv` for tests. The default impl simply
// calls into the real env; tests inject a deterministic map so they
// don't depend on the running process's environment.
type envLookup func(key string) (string, bool)
// parseArgs builds a one-shot flag.FlagSet, registers every supported
// flag (with hyphen and underscore aliases), parses argv, and returns
// the resulting Config. errOut is where the FlagSet writes usage and
// error messages on parse failure -- pass os.Stderr in production,
// or a *bytes.Buffer in tests.
//
// The argv parameter does NOT include the program name; pass
// os.Args[1:] to mirror Go's `flag.Parse()` contract.
//
// Internally this composes three steps so the function stays
// readable: applyDefaults seeds the zero-value Config, registerFlags
// wires every supported flag (including hyphen/underscore aliases)
// onto a one-shot FlagSet, and applyEnvFallbacks fills in missing
// values from the operator's environment after the FlagSet wins.
func parseArgs(argv []string, errOut io.Writer, getenv envLookup) (Config, error) {
if getenv == nil {
// Test-friendly default that never reads the real env, so
// parseArgs is deterministic when callers don't pass a
// fixture explicitly.
getenv = noEnv
}
cfg := defaultConfig()
fs := flag.NewFlagSet("gateway_main", flag.ContinueOnError)
fs.SetOutput(errOut)
versionFlag := false
registerFlags(fs, &cfg, &versionFlag)
if err := fs.Parse(argv); err != nil {
return Config{}, fmt.Errorf("parse flags: %w", err)
}
cfg.VersionRequested = versionFlag
if err := validatePorts(cfg); err != nil {
return Config{}, err
}
applyEnvFallbacks(&cfg, getenv)
if err := applyLegacyDatabaseFlag(&cfg); err != nil {
return Config{}, err
}
cfg.StartupWarnings = append(cfg.StartupWarnings, collectDataDirLayoutWarnings(cfg.DataDir)...)
return cfg, nil
}
// noEnv is the stand-in environment lookup parseArgs uses when the
// caller passes nil; it always returns "no such variable" so
// parseArgs is deterministic during tests that don't care about env
// fallbacks.
func noEnv(string) (string, bool) { return "", false }
// defaultConfig returns the seed Config parseArgs starts from. Pulled
// out so the defaults are visible in one place (and so tests that
// drive parseArgs directly can assert against the same baseline).
func defaultConfig() Config {
return Config{
ListenHost: "localhost",
HTTPPort: 9050,
GRPCPort: 9060,
EngineBinary: "emulator_main",
CopyEngineStderr: true,
}
}
// registerFlags wires every supported flag (including hyphen and
// underscore aliases) onto fs. Split out from parseArgs purely so
// the parser body stays under the funlen budget; nothing else
// invokes it.
func registerFlags(fs *flag.FlagSet, cfg *Config, versionFlag *bool) {
registerString(fs, &cfg.ListenHost, []string{"listen-host", "hostname"},
"Hostname for the emulator servers.")
registerInt(fs, &cfg.HTTPPort, []string{"http-port", "http_port", "port"},
"Port on which to run the BigQuery REST gateway.")
registerInt(fs, &cfg.GRPCPort, []string{"grpc-port", "grpc_port"},
"Port on which to run the internal engine gRPC server.")
registerString(fs, &cfg.EngineBinary, []string{"engine-binary", "engine_binary"},
"Path to the C++ engine binary. Empty disables the subprocess.")
registerString(fs, &cfg.DataDir,
[]string{"data-dir", "data_dir"},
"Persistent storage root. Passed to the engine as --data_dir. "+
"Falls back to $BIGQUERY_EMULATOR_DATA_DIR when empty.")
registerString(fs, &cfg.LegacyDatabase,
[]string{"database"},
"DEPRECATED (recidiviz/goccy compat): single SQLite catalog file. "+
"Mapped to --data-dir=<parent directory>. Prefer --data-dir.")
registerString(fs, &cfg.InitialDataDir,
[]string{"initial-data-dir"},
"Template directory copied into --data-dir on first start when "+
"--data-dir is empty. Falls back to $BIGQUERY_EMULATOR_INITIAL_DATA_DIR "+
"/ $EMULATOR_INITIAL_DATA_DIR.")
registerBool(fs, &cfg.CopyEngineStdout, []string{"copy-engine-stdout", "copy_engine_stdout"},
"Forward the engine subprocess's stdout to the gateway's.")
registerBool(fs, &cfg.CopyEngineStderr, []string{"copy-engine-stderr", "copy_engine_stderr"},
"Forward the engine subprocess's stderr to the gateway's.")
registerBool(fs, &cfg.LogRequests, []string{"log-requests", "log_requests"},
"Log every REST request and response.")
registerBool(fs, &cfg.Debug, []string{"debug"},
"Enable verbose lifecycle logging.")
registerString(fs, &cfg.DefaultProjectID, []string{"project-id", "project_id", "project"},
"Default BigQuery project clients are assumed to act against.")
registerString(fs, &cfg.DefaultDatasetID, []string{"dataset", "dataset-id", "dataset_id"},
"Default dataset used to resolve unqualified table names when a "+
"query/job does not set its own defaultDataset (e.g. SELECT * FROM t). "+
"Mirrors default_dataset on a production BigQuery client.")
registerString(fs, &cfg.DefaultDatasetLocation, []string{"default-dataset-location"},
"Default BigQuery location stamped on datasets created without an "+
"explicit location (e.g. US, EU).")
registerBool(fs, &cfg.EnableSeedAPI, []string{"enable-seed-api"},
"Register POST /api/emulator/seed and the operation polling endpoint.")
registerBool(fs, &cfg.SeedAPIAllowRemote, []string{"seed-api-allow-remote"},
"Allow non-loopback callers to invoke the seed API.")
registerString(fs, &cfg.SeedAPISeedToken,
[]string{"seed-api-seed-token"},
"Required value for the X-BigQuery-Emulator-Seed-Token header on every "+
"seed request. Falls back to $BIGQUERY_EMULATOR_SEED_TOKEN.")
registerStringSlice(fs, &cfg.SeedFiles, []string{"seed-data-file", "seed-yaml", "data-from-yaml"},
"YAML seed-data file to apply once the engine reports SERVING (repeatable).")
registerBool(fs, &cfg.EnableSQLToolsAPI, []string{"enable-sql-tools-api"},
"Register POST /api/emulator/sql/* parser/formatter/completion routes.")
registerBool(fs, &cfg.SQLToolsAPIAllowRemote, []string{"sql-tools-api-allow-remote"},
"Allow non-loopback callers to invoke the SQL tools API.")
registerString(fs, &cfg.SQLToolsAPISeedToken,
[]string{"sql-tools-api-token"},
"Required value for the X-BigQuery-Emulator-SqlTools-Token header on every "+
"SQL tools request. Falls back to $BIGQUERY_EMULATOR_SQL_TOOLS_TOKEN.")
registerBool(fs, versionFlag, []string{"version"},
"Print version information (semver + git commit + build date + Go toolchain) and exit.")
}
// validatePorts rejects out-of-range HTTP/gRPC ports or the case
// where both happen to be the same. Pulled out of parseArgs so the
// branch is testable and parseArgs stays short.
func validatePorts(cfg Config) error {
if cfg.HTTPPort <= 0 || cfg.HTTPPort > 65535 {
return fmt.Errorf("invalid --http-port %d: must be in 1..65535", cfg.HTTPPort)
}
if cfg.GRPCPort <= 0 || cfg.GRPCPort > 65535 {
return fmt.Errorf("invalid --grpc-port %d: must be in 1..65535", cfg.GRPCPort)
}
if cfg.HTTPPort == cfg.GRPCPort {
return fmt.Errorf("--http-port and --grpc-port must differ (both %d)", cfg.HTTPPort)
}
return nil
}
// applyEnvFallbacks walks the documented flag > env > nothing
// precedence. Mutates cfg in place. Each env-var name is checked in
// the documented order so the precedence stays observable from the
// source.
func applyEnvFallbacks(cfg *Config, getenv envLookup) {
if cfg.DataDir == "" {
if v, ok := getenv("BIGQUERY_EMULATOR_DATA_DIR"); ok {
cfg.DataDir = v
}
}
if cfg.InitialDataDir == "" {
for _, key := range []string{
"BIGQUERY_EMULATOR_INITIAL_DATA_DIR",
"EMULATOR_INITIAL_DATA_DIR",
} {
if v, ok := getenv(key); ok && v != "" {
cfg.InitialDataDir = v
break
}
}
}
if cfg.SeedAPISeedToken == "" {
if v, ok := getenv("BIGQUERY_EMULATOR_SEED_TOKEN"); ok {
cfg.SeedAPISeedToken = v
}
}
if cfg.SQLToolsAPISeedToken == "" {
if v, ok := getenv("BIGQUERY_EMULATOR_SQL_TOOLS_TOKEN"); ok {
cfg.SQLToolsAPISeedToken = v
}
}
}
func (c Config) engineInternalGRPCPort() int {
// The public BigQuery Storage shim binds --grpc-port; the engine
// subprocess listens on the next port so both can coexist.
return c.GRPCPort + 1
}
// ToOptions projects the parsed CLI config onto the gateway.Options
// addresses the runtime consumes. storageGRPCAddr is where client
// libraries dial BIGQUERY_STORAGE_GRPC_ENDPOINT; engineAddr is the
// internal bigquery_emulator.v1 listener the gateway dials.
func (c Config) ToOptions(engineBinary string) (httpAddr, storageGRPCAddr, engineAddr string, engineArgs []string) {
httpAddr = c.ListenHost + ":" + strconv.Itoa(c.HTTPPort)
storageGRPCAddr = c.ListenHost + ":" + strconv.Itoa(c.GRPCPort)
if engineBinary != "" {
engineAddr = c.ListenHost + ":" + strconv.Itoa(c.engineInternalGRPCPort())
}
engineArgs = c.engineCLIArgs()
return httpAddr, storageGRPCAddr, engineAddr, engineArgs
}
// engineCLIArgs renders the engine pass-through flags as a flat
// `--key value` slice. The engine uses double-hyphen underscore-style
// flags (`--data_dir foo`); operators on the gateway side can supply
// the same flag with either dash convention thanks to the aliasing
// above, but here we always emit the form emulator_main parses.
//
// Empty values are dropped; the gateway never forwards a flag the
// operator didn't set. The engine then keeps its own default.
func (c Config) engineCLIArgs() []string {
type pair struct{ name, value string }
pairs := []pair{
{"--data_dir", c.DataDir},
}
args := make([]string, 0, len(pairs)*2)
for _, p := range pairs {
if p.value == "" {
continue
}
args = append(args, p.name, p.value)
}
return args
}
// registerString registers a string flag under every name in
// `names`, all pointing at the same target. The first name in the
// slice owns the description that shows up in `--help`; aliases get
// a "(alias for --<first>)" stub so the help output stays scannable.
func registerString(fs *flag.FlagSet, target *string, names []string, desc string) {
if len(names) == 0 {
return
}
fs.StringVar(target, names[0], *target, desc)
for _, alias := range names[1:] {
fs.StringVar(target, alias, *target, "(alias for --"+names[0]+")")
}
}
// registerInt is the integer twin of registerString. It uses
// stringTarget under the hood so a malformed value writes an error
// to the FlagSet's output and ParseError surfaces to the caller.
func registerInt(fs *flag.FlagSet, target *int, names []string, desc string) {
if len(names) == 0 {
return
}
fs.IntVar(target, names[0], *target, desc)
for _, alias := range names[1:] {
fs.IntVar(target, alias, *target, "(alias for --"+names[0]+")")
}
}
// registerBool is the boolean twin of registerString. Bool flags
// uniquely accept the `--name` (no value) and `--name=true` /
// `--name=false` forms; both aliases inherit that behavior.
func registerBool(fs *flag.FlagSet, target *bool, names []string, desc string) {
if len(names) == 0 {
return
}
fs.BoolVar(target, names[0], *target, desc)
for _, alias := range names[1:] {
fs.BoolVar(target, alias, *target, "(alias for --"+names[0]+")")
}
}
// registerStringSlice registers a repeatable string flag under every
// name in `names`. The Go std flag package has no built-in repeat
// support, so we install a tiny flag.Value that appends to the
// target slice on every Set.
func registerStringSlice(fs *flag.FlagSet, target *[]string, names []string, desc string) {
if len(names) == 0 {
return
}
value := stringSliceValue{target: target}
fs.Var(&value, names[0], desc)
for _, alias := range names[1:] {
fs.Var(&value, alias, "(alias for --"+names[0]+")")
}
}
// stringSliceValue is the flag.Value implementation used by
// registerStringSlice. Each `--seed-data-file foo` appends "foo" to
// the target slice in the order the operator supplied them, which is
// the order the seed loader applies them.
type stringSliceValue struct {
target *[]string
}
func (s *stringSliceValue) String() string {
if s == nil || s.target == nil {
return ""
}
return strings.Join(*s.target, ",")
}
func (s *stringSliceValue) Set(v string) error {
if s == nil || s.target == nil {
return errors.New("stringSliceValue: nil target")
}
*s.target = append(*s.target, v)
return nil
}
// gateway_main is the BigQuery emulator's REST gateway entry point.
//
// It is structurally analogous to cloud-spanner-emulator's gateway_main:
// the C++ engine (emulator_main) implements SQL semantics on top of
// GoogleSQL, and this Go binary fronts it with a BigQuery-shaped REST API.
// On startup the gateway spawns the engine as a subprocess and shuts it
// down on exit.
//
// # CLI surface
//
// The parser is broken out into cli.go and supports both the legacy
// underscore-separated flag names this repository started with
// (`--http_port`) and the hyphen-separated equivalents
// (`--http-port`) documented for gateway_main. Every
// new operator-facing flag (data dir, engine pass-through, seed API,
// seed YAML files) is registered there; this file only wires the
// parsed Config into the gateway runtime.
package main
import (
"errors"
"flag"
"fmt"
"io"
"log" //nolint:depguard // process-launch + version-print error paths use stdlib log; gateway runtime emits structured slog via opts.Logger
"log/slog"
"os"
"path"
"path/filepath"
"runtime"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
"github.com/vantaboard/bigquery-emulator/gateway/seedfile"
"github.com/vantaboard/bigquery-emulator/gateway/storagetmpl"
)
// logStartupWarnings prints migration/layout notices from CLI parsing.
// Messages prefixed with "ERROR:" fail startup so operators do not silently
// point --data-dir at a legacy single-file catalog path.
func logStartupWarnings(warnings []string) error {
for _, msg := range warnings {
if after, ok := strings.CutPrefix(msg, "ERROR: "); ok {
return fmt.Errorf("%s", after)
}
log.Print(msg)
}
return nil
}
// Version metadata. The defaults (`dev` / `none` / `unknown`) are what a
// plain `go build` produces; release builds replace them via
// `-X main.version=... -X main.commit=... -X main.date=...` ldflags
// (see `.goreleaser.yml` and `taskfiles/emulator.yml`'s `gateway:build`
// helper). Keep these as `var` (not `const`) so the linker can overwrite
// them — `const string` cannot be ldflag-injected.
var (
version = "dev"
commit = "none"
date = "unknown"
)
// printVersion writes the multi-line version block to w. Pulled out
// into its own function so unit tests can drive it with a
// `bytes.Buffer` rather than fork a process. The format intentionally
// mirrors cloud-spanner-emulator's `gateway_main --version` shape (one
// title line, then indented `key: value` rows) so operators who know
// one emulator can read the other.
func printVersion(w io.Writer) {
// Writes into io.Writer can in principle fail (e.g. broken
// pipe when `gateway_main --version | head -1` closes early),
// but there's no meaningful recovery here -- the process is
// about to exit. Discard the errcheck warnings rather than
// pad each Fprintf with a no-op handler.
_, _ = fmt.Fprintf(w, "bigquery-emulator-gateway version %s\n", version)
_, _ = fmt.Fprintf(w, " commit: %s\n", commit)
_, _ = fmt.Fprintf(w, " built: %s\n", date)
_, _ = fmt.Fprintf(w, " go: %s\n", runtime.Version())
_, _ = fmt.Fprintf(w, " os/arch: %s/%s\n", runtime.GOOS, runtime.GOARCH)
}
// resolveEngineBinary mirrors the resolution logic from
// cloud-spanner-emulator: accept an absolute path as-is, otherwise look in
// the gateway binary's directory and its parent. Returns "" if disabled.
func resolveEngineBinary(name string) string {
if name == "" {
return ""
}
if path.IsAbs(name) {
return name
}
gwPath, err := os.Executable()
if err != nil {
log.Fatalf("could not resolve own executable path: %v", err)
}
gwDir := filepath.Dir(gwPath)
candidate := filepath.Join(gwDir, name)
if _, err := os.Stat(candidate); err == nil {
return candidate
}
candidate = filepath.Join(filepath.Dir(gwDir), name)
if _, err := os.Stat(candidate); err == nil {
return candidate
}
log.Fatalf("could not locate engine binary %q in %q or its parent",
name, gwDir)
return ""
}
func main() {
cfg, err := parseArgs(os.Args[1:], os.Stderr, os.LookupEnv)
if err != nil {
if errors.Is(err, flag.ErrHelp) {
os.Exit(0)
}
log.Fatal(err)
}
if cfg.VersionRequested {
printVersion(os.Stdout)
return
}
if err := logStartupWarnings(cfg.StartupWarnings); err != nil {
log.Fatal(err)
}
if err := runGateway(cfg); err != nil {
log.Fatal(err)
}
}
func runGateway(cfg Config) error {
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
httpAddr, storageGRPCAddr, engineAddr, engineArgs := cfg.ToOptions(cfg.EngineBinary)
logLevel := slog.LevelInfo
if cfg.Debug {
logLevel = slog.LevelDebug
}
gatewayLogger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
Level: logLevel,
}))
opts := gateway.Options{
HTTPAddress: httpAddr,
StorageGRPCAddress: storageGRPCAddr,
EngineAddress: engineAddr,
EngineBinary: resolveEngineBinary(cfg.EngineBinary),
EngineArgs: engineArgs,
CopyEngineStdout: cfg.CopyEngineStdout,
CopyEngineStderr: cfg.CopyEngineStderr,
LogRequests: cfg.LogRequests,
DefaultProjectID: cfg.DefaultProjectID,
DefaultDatasetID: cfg.DefaultDatasetID,
DefaultDatasetLocation: cfg.DefaultDatasetLocation,
EnableSeedAPI: cfg.EnableSeedAPI,
SeedAPIAllowRemote: cfg.SeedAPIAllowRemote,
SeedAPISeedToken: cfg.SeedAPISeedToken,
EnableSQLToolsAPI: cfg.EnableSQLToolsAPI,
SQLToolsAPIAllowRemote: cfg.SQLToolsAPIAllowRemote,
SQLToolsAPISeedToken: cfg.SQLToolsAPISeedToken,
SeedFiles: cfg.SeedFiles,
DataDir: cfg.DataDir,
InitialDataDir: cfg.InitialDataDir,
Debug: cfg.Debug,
Logger: gatewayLogger,
}
gw := gateway.New(opts).
WithPreStartHook(func(o gateway.Options) error {
return storagetmpl.MaybeMaterialize(o.InitialDataDir, o.DataDir)
}).
WithPostEngineHook(func(o gateway.Options, ec *engine.Client) error {
if len(o.SeedFiles) == 0 || ec == nil {
return nil
}
return seedfile.ApplyFiles(o.SeedFiles,
seed.NewCatalogApplier(ec.Catalog),
gateway.DefaultsFromOptions(o))
})
return gw.Run()
}
// Legacy --database flag handling and data-dir layout warnings for operators
// migrating from the recidiviz/goccy single-file SQLite catalog to this
// emulator's directory-based DuckDB layout.
package main
import (
"fmt"
"os"
"path/filepath"
"strings"
)
const legacyDatabaseMigrationDoc = "docs/REST_API.md#persistence-and-data-dir"
// applyLegacyDatabaseFlag maps the removed recidiviz/goccy --database=file.db
// flag onto --data-dir=<parent> and appends an actionable deprecation warning.
func applyLegacyDatabaseFlag(cfg *Config) error {
if cfg.LegacyDatabase == "" {
return nil
}
if cfg.DataDir != "" {
return fmt.Errorf(
"cannot use both --database and --data-dir; replace --database=%q with --data-dir=%q (see %s)",
cfg.LegacyDatabase,
filepath.Dir(cfg.LegacyDatabase),
legacyDatabaseMigrationDoc,
)
}
cfg.DataDir = filepath.Dir(cfg.LegacyDatabase)
cfg.StartupWarnings = append(cfg.StartupWarnings,
fmt.Sprintf(
"DEPRECATED: --database is removed. The recidiviz/goccy fork stored catalog state "+
"in a single SQLite file (%q); this emulator persists under a directory "+
"(--data-dir) with catalog.duckdb and sidecar parquet/meta.json files. "+
"Mapped --database -> --data-dir=%q. Data in the old single-file format is "+
"not automatically loaded; mount the volume at --data-dir and migrate or "+
"re-seed if needed. See %s.",
cfg.LegacyDatabase,
cfg.DataDir,
legacyDatabaseMigrationDoc,
),
)
return nil
}
// collectDataDirLayoutWarnings scans an on-disk data-dir for layouts that
// suggest an operator pointed --data-dir at a legacy single-file catalog or
// left orphaned SQLite files on a shared volume after switching flags.
func collectDataDirLayoutWarnings(dataDir string) []string {
if dataDir == "" {
return nil
}
info, err := os.Stat(dataDir)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return []string{fmt.Sprintf("WARN: cannot stat --data-dir %q: %v", dataDir, err)}
}
if !info.IsDir() {
return []string{
fmt.Sprintf(
"ERROR: --data-dir %q is a file, not a directory. The recidiviz/goccy "+
"--database=/path/catalog.db flag pointed at a single SQLite file; "+
"this emulator expects --data-dir=/parent/directory. See %s.",
dataDir,
legacyDatabaseMigrationDoc,
),
}
}
hasCatalog, legacyDB, readErr := scanDataDirRoot(dataDir)
if readErr != "" {
return []string{readErr}
}
return legacyDataDirWarnings(dataDir, hasCatalog, legacyDB)
}
func scanDataDirRoot(dataDir string) (hasCatalog bool, legacyDB []string, readErr string) {
entries, err := os.ReadDir(dataDir)
if err != nil {
return false, nil, fmt.Sprintf("WARN: cannot read --data-dir %q: %v", dataDir, err)
}
for _, e := range entries {
if e.IsDir() {
continue
}
name := e.Name()
if name == "catalog.duckdb" {
hasCatalog = true
continue
}
lower := strings.ToLower(name)
if strings.HasSuffix(lower, ".db") ||
strings.HasSuffix(lower, ".sqlite") ||
strings.HasSuffix(lower, ".sqlite3") {
legacyDB = append(legacyDB, name)
}
}
return hasCatalog, legacyDB, ""
}
func legacyDataDirWarnings(dataDir string, hasCatalog bool, legacyDB []string) []string {
if len(legacyDB) == 0 {
return nil
}
if hasCatalog {
return []string{
fmt.Sprintf(
"WARN: --data-dir %q contains legacy single-file database(s) %v alongside "+
"catalog.duckdb; the old SQLite files are ignored. Safe to delete after "+
"confirming catalog.duckdb has your data.",
dataDir,
legacyDB,
),
}
}
return []string{
fmt.Sprintf(
"WARN: --data-dir %q contains file(s) %v that look like the recidiviz/goccy "+
"single-file SQLite catalog, but no catalog.duckdb from this emulator. "+
"State from the old format is not loaded automatically; point --data-dir "+
"at an empty directory or re-seed. See %s.",
dataDir,
legacyDB,
legacyDatabaseMigrationDoc,
),
}
}
// differential-record captures production BigQuery output for the differential corpus.
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"cloud.google.com/go/bigquery"
"github.com/vantaboard/bigquery-emulator/conformance/differential"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"google.golang.org/api/iterator"
)
func main() {
if err := run(); err != nil {
_, _ = fmt.Fprintln(os.Stderr, "differential-record:", err)
os.Exit(2)
}
}
func run() error {
fs := flag.NewFlagSet("differential-record", flag.ContinueOnError)
fs.SetOutput(os.Stderr)
corpus := fs.String("corpus", differential.DefaultCorpusDir, "corpus directory or single YAML")
oracleDir := fs.String("oracle-dir", differential.DefaultOracleDir, "directory to write oracle JSON files")
project := fs.String("project", "", "GCP project (default: BIGQUERY_DIFFERENTIAL_PROJECT)")
dryRun := fs.Bool("dry-run", false, "print actions without writing oracle files")
if err := fs.Parse(os.Args[1:]); err != nil {
if errors.Is(err, flag.ErrHelp) {
return nil
}
return err
}
projectID := strings.TrimSpace(*project)
if projectID == "" {
projectID = strings.TrimSpace(os.Getenv("BIGQUERY_DIFFERENTIAL_PROJECT"))
}
if projectID == "" {
printSkipInstructions()
return nil
}
cases, err := differential.LoadCorpusDir(*corpus, false)
if err != nil {
return err
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
<-sigCh
cancel()
}()
defer signal.Stop(sigCh)
client, err := bigquery.NewClient(ctx, projectID)
if err != nil {
return fmt.Errorf("bigquery client: %w", err)
}
defer func() { _ = client.Close() }()
for _, c := range cases {
if err := recordCase(ctx, client, projectID, *oracleDir, c, *dryRun); err != nil {
return fmt.Errorf("%s: %w", c.Name, err)
}
}
return nil
}
func printSkipInstructions() {
_, _ = fmt.Fprintln(os.Stderr, `differential-record: skipped — no GCP project configured.
Set BIGQUERY_DIFFERENTIAL_PROJECT to a project where you can create ephemeral
datasets, or pass --project=<id>. Application Default Credentials must be
available (gcloud auth application-default login).
The committed oracle JSON under conformance/differential/oracle/ is what CI
replays; recording is manual/opt-in. When GCP access is unavailable, pin
oracle expectations from bq query output and mark oracle_source: bq-cli in
the corpus YAML (see .cursor/rules/conformance-bq-validation.mdc).`)
}
func recordCase(
ctx context.Context,
client *bigquery.Client,
project, oracleDir string,
c *differential.CorpusCase,
dryRun bool,
) error {
dsID := fmt.Sprintf("diff_record_%s_%d", sanitizeDatasetID(c.Name), time.Now().Unix())
ds := client.Dataset(dsID)
if err := ds.Create(ctx, &bigquery.DatasetMetadata{Location: "US"}); err != nil {
return fmt.Errorf("create dataset %s: %w", dsID, err)
}
defer func() { _ = ds.DeleteWithContents(ctx) }()
if err := applySetup(ctx, client, project, dsID, c); err != nil {
return err
}
return captureQueryOracle(ctx, client, project, dsID, oracleDir, c, dryRun)
}
func captureQueryOracle(
ctx context.Context,
client *bigquery.Client,
project, dsID, oracleDir string,
c *differential.CorpusCase,
dryRun bool,
) error {
q := client.Query(c.Query)
q.DefaultProjectID = project
q.DefaultDatasetID = dsID
if c.DefaultDataset != "" {
q.DefaultDatasetID = c.DefaultDataset
}
if len(c.QueryParameters) > 0 {
q.Parameters = toBQParams(c.QueryParameters)
}
job, err := q.Run(ctx)
if err != nil {
return fmt.Errorf("run query: %w", err)
}
status, err := job.Wait(ctx)
if err != nil {
return fmt.Errorf("wait job: %w", err)
}
jobID := job.ID()
if status.Err() != nil {
o := &differential.Oracle{
Project: project, OracleSource: "recorded", Match: c.Match,
Success: false, JobID: jobID,
Error: &differential.OracleError{Message: status.Err().Error()},
}
return writeOracle(oracleDir, c, o, dryRun)
}
it, err := job.Read(ctx)
if err != nil {
return fmt.Errorf("read results: %w", err)
}
schema, rows, err := readAllRows(it)
if err != nil {
return err
}
o := &differential.Oracle{
Project: project, OracleSource: "recorded", Match: c.Match,
Success: true, JobID: jobID, Schema: schema, Rows: rows,
JobReference: &bqtypes.JobReference{
ProjectID: project, JobID: jobID, Location: "US",
},
}
return writeOracle(oracleDir, c, o, dryRun)
}
func writeOracle(oracleDir string, c *differential.CorpusCase, o *differential.Oracle, dryRun bool) error {
path := filepath.Join(oracleDir, c.OracleRef)
if dryRun {
_, _ = fmt.Fprintf(os.Stderr, "would write oracle %s for %s\n", path, c.Name)
return nil
}
if err := os.MkdirAll(oracleDir, 0o750); err != nil {
return err
}
return differential.WriteOracle(path, o)
}
func applySetup(ctx context.Context, client *bigquery.Client, project, dsID string, c *differential.CorpusCase) error {
for i, step := range c.Setup {
switch {
case step.Dataset != "":
target := client.Dataset(step.Dataset)
if err := target.Create(ctx, &bigquery.DatasetMetadata{Location: "US"}); err != nil {
return fmt.Errorf("setup[%d] dataset %s: %w", i, step.Dataset, err)
}
case step.Table != nil:
if err := createTable(ctx, client, step.Table); err != nil {
return fmt.Errorf("setup[%d] table: %w", i, err)
}
case step.Rows != nil:
if err := insertRows(ctx, client, step.Rows); err != nil {
return fmt.Errorf("setup[%d] rows: %w", i, err)
}
case strings.TrimSpace(step.SQL) != "":
q := client.Query(step.SQL)
q.DefaultProjectID = project
q.DefaultDatasetID = dsID
job, err := q.Run(ctx)
if err != nil {
return fmt.Errorf("setup[%d] sql run: %w", i, err)
}
if st, err := job.Wait(ctx); err != nil {
return fmt.Errorf("setup[%d] sql wait: %w", i, err)
} else if st.Err() != nil {
return fmt.Errorf("setup[%d] sql error: %w", i, st.Err())
}
default:
return fmt.Errorf("setup[%d]: unsupported step for recorder", i)
}
}
return nil
}
func createTable(ctx context.Context, client *bigquery.Client, t *runner.TableSetup) error {
meta := &bigquery.TableMetadata{}
if t.View != nil {
meta.ViewQuery = t.View.Query
} else if len(t.Schema) > 0 {
meta.Schema = toBQSchema(t.Schema)
}
table := client.Dataset(t.Dataset).Table(t.ID)
return table.Create(ctx, meta)
}
func insertRows(ctx context.Context, client *bigquery.Client, rs *runner.RowsSetup) error {
inserter := client.Dataset(rs.Dataset).Table(rs.Table).Inserter()
return inserter.Put(ctx, rs.Rows)
}
func toBQSchema(cols []runner.SchemaColumn) bigquery.Schema {
out := make(bigquery.Schema, 0, len(cols))
for _, c := range cols {
fs := &bigquery.FieldSchema{
Name: c.Name,
Type: bigquery.FieldType(strings.ToUpper(c.Type)),
}
switch strings.ToUpper(c.Mode) {
case "REQUIRED":
fs.Required = true
case "REPEATED":
fs.Repeated = true
}
out = append(out, fs)
}
return out
}
func toBQParams(params []differential.QueryParameterYAML) []bigquery.QueryParameter {
out := make([]bigquery.QueryParameter, 0, len(params))
for _, p := range params {
out = append(out, bigquery.QueryParameter{
Name: p.Name,
Value: p.Value,
})
}
return out
}
func readAllRows(it *bigquery.RowIterator) (*bqtypes.TableSchema, []bqtypes.Row, error) {
schema := wireSchema(it.Schema)
var rows []bqtypes.Row
for {
var vals []bigquery.Value
err := it.Next(&vals)
if errors.Is(err, iterator.Done) {
break
}
if err != nil {
return nil, nil, err
}
row := bqtypes.Row{F: make([]bqtypes.Cell, len(vals))}
for i, v := range vals {
row.F[i] = bqtypes.Cell{V: wireCell(v)}
}
rows = append(rows, row)
}
return schema, rows, nil
}
func wireSchema(s bigquery.Schema) *bqtypes.TableSchema {
if s == nil {
return nil
}
fields := make([]bqtypes.TableFieldSchema, len(s))
for i, f := range s {
mode := "NULLABLE"
if f.Repeated {
mode = "REPEATED"
} else if f.Required {
mode = "REQUIRED"
}
fields[i] = bqtypes.TableFieldSchema{
Name: f.Name,
Type: string(f.Type),
Mode: mode,
}
}
return &bqtypes.TableSchema{Fields: fields}
}
func wireCell(v bigquery.Value) any {
if v == nil {
return nil
}
switch x := v.(type) {
case string:
return x
case int64:
return strconv.FormatInt(x, 10)
case float64:
return fmt.Sprintf("%g", x)
case bool:
if x {
return "true"
}
return "false"
default:
b, _ := json.Marshal(v)
return string(b)
}
}
func sanitizeDatasetID(name string) string {
var b strings.Builder
for _, r := range strings.ToLower(name) {
if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' {
b.WriteRune(r)
} else {
b.WriteRune('_')
}
}
out := b.String()
if len(out) > 40 {
out = out[:40]
}
return out
}
// differential replays committed production-BigQuery oracles against the emulator.
package main
import (
"context"
"errors"
"flag"
"fmt"
"io"
"os"
"os/signal"
"path/filepath"
"syscall"
"github.com/vantaboard/bigquery-emulator/conformance/differential"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
)
func main() {
code, err := run()
if err != nil {
_, _ = fmt.Fprintln(os.Stderr, "differential:", err)
os.Exit(2)
}
if code != 0 {
os.Exit(code)
}
}
func run() (int, error) {
fs := flag.NewFlagSet("differential", flag.ContinueOnError)
fs.SetOutput(os.Stderr)
corpus := fs.String("corpus", differential.DefaultCorpusDir, "corpus directory or single YAML")
oracleDir := fs.String("oracle-dir", differential.DefaultOracleDir, "directory of committed oracle JSON files")
engineBinary := fs.String("engine-binary", "./bin/emulator_main", "path to emulator_main")
connect := fs.String("connect", "", "HOST:PORT of a running gateway (mutually exclusive with --engine-binary)")
profile := fs.String("profile", "duckdb", "runtime profile")
output := fs.String("output", "text", "output format: text or json")
outputFile := fs.String("output-file", "", "tee report to this file (atomic write)")
includeSelfTest := fs.Bool("include-selftest", false, "run _-prefixed self-test corpus files")
if err := fs.Parse(os.Args[1:]); err != nil {
if errors.Is(err, flag.ErrHelp) {
return 0, nil
}
return 0, err
}
if *engineBinary != "" && *connect != "" && *engineBinary != "./bin/emulator_main" {
return 0, errors.New("--engine-binary and --connect are mutually exclusive")
}
if *connect != "" {
*engineBinary = ""
}
out, cleanup, err := setupOutputFile(*outputFile)
if err != nil {
return 0, err
}
if cleanup != nil {
defer cleanup()
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
<-sigCh
cancel()
}()
defer signal.Stop(sigCh)
report, err := differential.Run(ctx, differential.Options{
CorpusDir: *corpus,
OracleDir: *oracleDir,
IncludeSelfTest: *includeSelfTest,
Harness: runner.HarnessOptions{
EngineBinary: *engineBinary,
ConnectAddress: *connect,
EngineStdout: os.Stderr,
EngineStderr: os.Stderr,
},
Profile: *profile,
Output: *output,
Out: out,
Err: os.Stderr,
})
if err != nil {
return 0, err
}
return report.ExitCode(), nil
}
func setupOutputFile(path string) (io.Writer, func(), error) {
if path == "" {
return os.Stdout, nil, nil
}
dir := filepath.Dir(path)
if dir == "" {
dir = "."
}
tmp, err := os.CreateTemp(dir, ".differential-*.tmp")
if err != nil {
return nil, nil, fmt.Errorf("create --output-file tmp: %w", err)
}
tmpName := tmp.Name()
cleanup := func() {
_ = tmp.Close()
if err := os.Rename(tmpName, path); err != nil {
_, _ = fmt.Fprintln(os.Stderr, "differential: rename --output-file:", err)
_ = os.Remove(tmpName)
}
}
return io.MultiWriter(os.Stdout, tmp), cleanup, nil
}
// genbqutils converts a bigquery-utils extractor JSON manifest into native
// conformance YAML fixtures under conformance/thirdparty-fixtures/bigquery_utils/.
package main
import (
"encoding/json"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
"gopkg.in/yaml.v3"
)
type manifestCase struct {
Inputs []string `json:"inputs,omitempty"`
ExpectedOutput string `json:"expected_output"`
InputColumns []string `json:"input_columns,omitempty"`
InputRows string `json:"input_rows,omitempty"`
}
type manifestUDF struct {
Family string `json:"family"`
Name string `json:"name"`
Kind string `json:"kind,omitempty"`
UpstreamSQLX string `json:"upstream_sqlx"`
UpstreamTestCases string `json:"upstream_test_cases"`
CreateSQL string `json:"create_sql"`
Cases []manifestCase `json:"cases"`
}
type manifest struct {
SourceSHA string `json:"source_sha"`
Emitted []manifestUDF `json:"emitted"`
Skipped []struct {
Family string `json:"family"`
Name string `json:"name"`
Reason string `json:"reason"`
} `json:"skipped"`
}
var nonAlnum = regexp.MustCompile(`[^a-z0-9]+`)
func main() {
outDir := flag.String(
"out-dir",
"conformance/thirdparty-fixtures/bigquery_utils/known_failing",
"output root (wiped each run)",
)
flag.Parse()
data, err := io.ReadAll(os.Stdin)
if err != nil {
fatal("read stdin: %v", err)
}
var m manifest
if unmarshalErr := json.Unmarshal(data, &m); unmarshalErr != nil {
fatal("parse manifest: %v", unmarshalErr)
}
root, err := repoRoot()
if err != nil {
fatal("%v", err)
}
absOut := *outDir
if !filepath.IsAbs(absOut) {
absOut = filepath.Join(root, absOut)
}
if err := wipeDir(absOut); err != nil {
fatal("wipe %s: %v", absOut, err)
}
for _, udf := range m.Emitted {
if err := writeFixture(root, absOut, m.SourceSHA, udf); err != nil {
fatal("write %s/%s: %v", udf.Family, udf.Name, err)
}
}
fmt.Fprintf(os.Stderr, "genbqutils: wrote %d fixtures to %s\n", len(m.Emitted), absOut)
}
func repoRoot() (string, error) {
wd, err := os.Getwd()
if err != nil {
return "", err
}
dir := wd
for {
if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
return dir, nil
}
parent := filepath.Dir(dir)
if parent == dir {
return "", fmt.Errorf("could not find repo root from %s", wd)
}
dir = parent
}
}
func wipeDir(dir string) error {
if err := os.RemoveAll(dir); err != nil {
return err
}
return os.MkdirAll(dir, 0o750)
}
func fixtureName(family, name string) string {
parts := []string{"bqutils"}
for seg := range strings.SplitSeq(family, "/") {
if seg != "" {
parts = append(parts, seg)
}
}
parts = append(parts, name)
raw := strings.Join(parts, "_")
return nonAlnum.ReplaceAllString(strings.ToLower(raw), "_")
}
func projectID(name string) string {
slug := nonAlnum.ReplaceAllString(strings.ToLower(name), "-")
slug = strings.Trim(slug, "-")
if slug == "" {
slug = "udf"
}
return "proj-bqutils-" + slug
}
func buildQuery(udf manifestUDF) string {
if udf.Kind == "udaf" {
return buildUdafQuery(udf)
}
var b strings.Builder
b.WriteString("WITH cases AS (\n")
for i, tc := range udf.Cases {
if i > 0 {
b.WriteString(" UNION ALL\n")
}
args := strings.Join(tc.Inputs, ", ")
fmt.Fprintf(&b, " SELECT %d AS case_id, TO_JSON_STRING(%s(%s)) AS actual, TO_JSON_STRING(%s) AS expected\n",
i, udf.Name, args, tc.ExpectedOutput)
}
b.WriteString(")\n")
// NULL = NULL is UNKNOWN in SQL; treat two NULL JSON strings as equal.
b.WriteString(
"SELECT case_id, IFNULL(actual = expected, actual IS NULL AND expected IS NULL) AS matches FROM cases ORDER BY case_id\n",
)
return b.String()
}
func buildUdafQuery(udf manifestUDF) string {
var b strings.Builder
b.WriteString("WITH cases AS (\n")
for i, tc := range udf.Cases {
if i > 0 {
b.WriteString(" UNION ALL\n")
}
var aggCols []string
var udafArgs []string
aggIdx := 0
for _, col := range tc.InputColumns {
if strings.Contains(col, " NOT AGGREGATE") {
lit := strings.TrimSpace(strings.Split(col, " NOT AGGREGATE")[0])
udafArgs = append(udafArgs, lit)
continue
}
alias := fmt.Sprintf("test_input_%d", aggIdx)
aggCols = append(aggCols, fmt.Sprintf("%s AS %s", col, alias))
udafArgs = append(udafArgs, alias)
aggIdx++
}
fromClause := tc.InputRows
if len(aggCols) > 0 {
fromClause = fmt.Sprintf("SELECT %s FROM (%s)", strings.Join(aggCols, ", "), tc.InputRows)
}
fmt.Fprintf(&b,
" SELECT %d AS case_id, TO_JSON_STRING(%s(%s)) AS actual, TO_JSON_STRING(%s) AS expected\n FROM (%s)\n",
i, udf.Name, strings.Join(udafArgs, ", "), tc.ExpectedOutput, fromClause)
}
b.WriteString(")\n")
b.WriteString(
"SELECT case_id, IFNULL(actual = expected, actual IS NULL AND expected IS NULL) AS matches FROM cases ORDER BY case_id\n",
)
return b.String()
}
func kindLabel(kind string) string {
if kind == "udaf" {
return "UDAF"
}
return "UDF"
}
func buildFixture(udf manifestUDF) runner.Fixture {
rows := make([]map[string]any, len(udf.Cases))
for i := range udf.Cases {
rows[i] = map[string]any{
"case_id": strconv.Itoa(i),
"matches": true,
}
}
return runner.Fixture{
Name: fixtureName(udf.Family, udf.Name),
Description: fmt.Sprintf(
"bigquery-utils %s %s %s (%d cases)",
udf.Family,
kindLabel(udf.Kind),
udf.Name,
len(udf.Cases),
),
Profiles: []string{runner.ProfileDuckDB},
ProjectID: projectID(udf.Name),
Setup: []runner.SetupStep{
{SQL: strings.TrimSpace(udf.CreateSQL)},
},
Query: buildQuery(udf),
Expected: runner.Expectation{
Match: runner.MatchOrdered,
Rows: rows,
},
}
}
func provenanceHeader(sha string, udf manifestUDF) string {
if sha == "" {
sha = "unknown"
}
return fmt.Sprintf(
"# Source: GoogleCloudPlatform/bigquery-utils @ %s\n"+
"# %s (+ %s)\n"+
"# License: Apache-2.0. Generated by scripts/sync_bigquery_utils_udfs.sh; do not edit by hand.\n",
sha, udf.UpstreamSQLX, filepath.Base(udf.UpstreamTestCases),
)
}
func marshalFixture(f runner.Fixture) ([]byte, error) {
var body strings.Builder
enc := yaml.NewEncoder(&body)
enc.SetIndent(2)
if err := enc.Encode(&f); err != nil {
return nil, err
}
if err := enc.Close(); err != nil {
return nil, err
}
return []byte(body.String()), nil
}
func writeFixture(repoRoot, outRoot, sha string, udf manifestUDF) error {
f := buildFixture(udf)
body, err := marshalFixture(f)
if err != nil {
return err
}
outPath := filepath.Join(outRoot, udf.Family, udf.Name+".yaml")
if mkdirErr := os.MkdirAll(filepath.Dir(outPath), 0o750); mkdirErr != nil {
return mkdirErr
}
var out strings.Builder
out.WriteString(provenanceHeader(sha, udf))
out.Write(body)
content := []byte(out.String())
// Round-trip through the runner loader so schema drift fails fast.
tmp, err := os.CreateTemp(repoRoot, ".tmp-genbqutils-*.yaml")
if err != nil {
return err
}
tmpPath := tmp.Name()
defer func() {
_ = os.Remove(tmpPath)
}()
if _, err := tmp.Write(content); err != nil {
_ = tmp.Close()
return err
}
if err := tmp.Close(); err != nil {
return err
}
if _, err := runner.Load(tmpPath); err != nil {
return fmt.Errorf("runner.Load: %w", err)
}
return os.WriteFile(outPath, content, 0o600)
}
func fatal(format string, args ...any) {
fmt.Fprintf(os.Stderr, "genbqutils: "+format+"\n", args...)
os.Exit(1)
}
// googlesql-corpus drives the vendored GoogleSQL compliance .test subset
// through jobs.query and diffs results with the fixture lane's typed-cell
// comparator.
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"os"
"os/signal"
"syscall"
"github.com/vantaboard/bigquery-emulator/conformance/googlesqlcorpus"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
)
func main() {
code, err := run()
if err != nil {
_, _ = fmt.Fprintln(os.Stderr, "googlesql-corpus:", err)
os.Exit(2)
}
if code != 0 {
os.Exit(code)
}
}
type cliConfig struct {
corpusDir string
manifestPath string
engineBinary string
profile string
gatePinned bool
triage bool
output string
outputFile string
}
func run() (int, error) {
cfg, err := parseCLI(os.Args[1:])
if err != nil {
if errors.Is(err, flag.ErrHelp) {
return 0, nil
}
return 0, err
}
manifest, err := googlesqlcorpus.LoadManifest(cfg.manifestPath)
if err != nil && !os.IsNotExist(err) {
return 0, err
}
if manifest == nil {
manifest = &googlesqlcorpus.Manifest{}
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
<-sigCh
cancel()
}()
defer signal.Stop(sigCh)
report, err := googlesqlcorpus.Run(ctx, googlesqlcorpus.Options{
CorpusDir: cfg.corpusDir,
Manifest: manifest,
GatePinned: cfg.gatePinned && !cfg.triage,
TriageMode: cfg.triage,
Harness: runner.HarnessOptions{
EngineBinary: cfg.engineBinary,
EngineStdout: os.Stderr,
EngineStderr: os.Stderr,
},
Profile: cfg.profile,
Out: os.Stdout,
Err: os.Stderr,
})
if err != nil {
return 0, err
}
if cfg.triage {
updateManifestFromTriage(manifest, report)
if err := googlesqlcorpus.SaveManifest(cfg.manifestPath, manifest); err != nil {
return 0, err
}
}
if err := writeReport(report, cfg.output, cfg.outputFile); err != nil {
return 0, err
}
return report.ExitCode(), nil
}
func parseCLI(args []string) (cliConfig, error) {
fs := flag.NewFlagSet("googlesql-corpus", flag.ContinueOnError)
cfg := cliConfig{}
fs.StringVar(&cfg.corpusDir, "corpus", "conformance/googlesql-corpus/corpus", "directory of vendored .test files")
fs.StringVar(
&cfg.manifestPath,
"manifest",
"conformance/googlesql-corpus/manifest/pinned.json",
"pinned-passing manifest",
)
fs.StringVar(&cfg.engineBinary, "engine-binary", "./bin/emulator_main", "path to emulator_main")
fs.StringVar(&cfg.profile, "profile", "duckdb", "conformance profile")
fs.BoolVar(&cfg.gatePinned, "gate-pinned", true, "only run cases listed in manifest pinned set")
fs.BoolVar(&cfg.triage, "triage", false, "run all runnable cases and write triage buckets to manifest")
fs.StringVar(&cfg.output, "output", "text", "text or json")
fs.StringVar(&cfg.outputFile, "output-file", "", "optional report path")
if err := fs.Parse(args); err != nil {
return cliConfig{}, err
}
return cfg, nil
}
func writeReport(report *googlesqlcorpus.Report, output, outputFile string) error {
if output != "json" && outputFile == "" {
return nil
}
b, err := json.MarshalIndent(report, "", " ")
if err != nil {
return err
}
if outputFile != "" {
if err := os.WriteFile(
outputFile,
append(b, '\n'),
0o600,
); err != nil { //nolint:gosec // report path is CLI-controlled
return err
}
}
if output == "json" {
fmt.Println(string(b))
}
return nil
}
func updateManifestFromTriage(m *googlesqlcorpus.Manifest, report *googlesqlcorpus.Report) {
if m.Triage == nil {
m.Triage = make(map[string]googlesqlcorpus.TriageEntry)
}
m.Pinned = nil
for _, r := range report.Results {
switch r.Status {
case string(runner.StatusPass):
m.Pinned = append(m.Pinned, r.ID)
m.Triage[r.ID] = googlesqlcorpus.TriageEntry{Bucket: googlesqlcorpus.BucketPinnedPass}
case string(runner.StatusSkip):
m.Triage[r.ID] = googlesqlcorpus.TriageEntry{Bucket: r.Bucket, Message: r.Message}
default:
m.Triage[r.ID] = googlesqlcorpus.TriageEntry{Bucket: r.Bucket, Message: r.Message}
}
}
}
// Binary routing-matrix walks every fixture under
// `conformance/fixtures/` (excluding the leading-underscore
// quarantine directories) and emits a Markdown table of `Shape |
// Route | Strict` rows. The output is a non-blocking CI artifact
// surfaced by `task conformance:routing-matrix` (see
// `docs/ENGINE_POLICY.md`) and used by
// reviewers to spot when a fixture family's actual route does not
// match its directory's aspirational label.
//
// The walker reads YAML only -- it does NOT talk to a live
// emulator. The fixture's `expected.route` is the source of
// truth for the matrix column; the runner enforces that the
// engine actually emits that route at execution time.
//
// Determinism: rows are sorted by fixture path so two consecutive
// invocations produce byte-identical output (pinned by
// `routing_matrix_test.go::TestRoutingMatrixIsReproducible`).
package main
import (
"flag"
"fmt"
"io"
"os"
"path/filepath"
"slices"
"sort"
"strings"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
)
func main() {
code, err := run(os.Args[1:], os.Stdout, os.Stderr)
if err != nil {
fmt.Fprintln(os.Stderr, "routing-matrix:", err)
}
os.Exit(code)
}
func run(args []string, stdout, stderr io.Writer) (int, error) {
fs := flag.NewFlagSet("routing-matrix", flag.ContinueOnError)
fs.SetOutput(stderr)
fixturesDir := fs.String("fixtures", "conformance/fixtures",
"path to the fixtures directory (or single fixture file)")
outputFile := fs.String("output-file", "",
"if set, write the Markdown table to this path (truncate). "+
"Stdout still receives the same bytes for piping.")
if err := fs.Parse(args); err != nil {
return 2, err
}
table, err := buildMatrix(*fixturesDir)
if err != nil {
return 1, err
}
if _, err := io.WriteString(stdout, table); err != nil {
return 1, err
}
if *outputFile != "" {
// 0o600 keeps gosec G306 quiet -- the matrix is a
// reproducible function of the public fixture YAMLs and
// has no secret content, but the linter wants a strict
// mode and the CI artifact uploader (or local
// inspection) does not rely on group-readable
// permissions on the on-disk copy.
if err := os.WriteFile(*outputFile, []byte(table), 0o600); err != nil {
return 1, fmt.Errorf("write %s: %w", *outputFile, err)
}
}
return 0, nil
}
// matrixRow is the rendered shape of a single fixture in the
// matrix output. Kept private to the rendering helpers below.
type matrixRow struct {
shape string
route string
strict bool
}
// buildMatrix loads every fixture under `dir` (LoadDir already
// skips the `_*/` quarantine directories) and renders a Markdown
// table sorted by fixture path. The third column flags
// `route_strict: false` rows so a reviewer can spot the
// documentation-only entries (typically error-path fixtures whose
// engine response never carries a trailer).
func buildMatrix(dir string) (string, error) {
fixtures, err := runner.LoadDir(dir)
if err != nil {
return "", err
}
rows := collectMatrixRows(fixtures, dir)
return renderMatrix(rows), nil
}
// collectMatrixRows converts loaded fixtures into the sorted-by-
// shape row set the renderer expects. Path normalization
// (forward-slash, suffix-stripped) lives here so the rendering
// helper stays pure-string.
func collectMatrixRows(fixtures []*runner.Fixture, dir string) []matrixRow {
rows := make([]matrixRow, 0, len(fixtures))
for _, f := range fixtures {
rel, err := filepath.Rel(dir, f.Path)
if err != nil {
rel = f.Path
}
shape := filepath.ToSlash(
strings.TrimSuffix(rel, filepath.Ext(rel)),
)
route := f.Expected.Route
if route == "" && len(f.Expected.RouteAllowlist) > 0 {
route = "[" + strings.Join(f.Expected.RouteAllowlist, ", ") + "]"
}
if route == "" {
route = "(unassigned)"
}
rows = append(rows, matrixRow{
shape: shape,
route: route,
strict: f.Expected.RouteStrictDefault(),
})
}
sort.SliceStable(rows, func(i, j int) bool { return rows[i].shape < rows[j].shape })
return rows
}
// renderMatrix produces the Markdown body. Pulled out of
// buildMatrix to keep the statement count below the funlen cap
// and to make the renderer trivially diffable in tests.
func renderMatrix(rows []matrixRow) string {
counts := map[string]int{}
for _, r := range rows {
counts[r.route]++
}
var b strings.Builder
b.WriteString(matrixHeader)
writeSummary(&b, counts, len(rows))
writePerFixture(&b, rows)
return b.String()
}
const matrixHeader = `# Conformance routing matrix
Generated by ` + "`task conformance:routing-matrix`" + `. Each row pins the route the coordinator's ` +
"`RouteClassifier`" + ` chooses for the fixture's ` + "`query:`" + ` step (the ` + "`setup:`" +
` steps run on their own routes).
Strict=` + "`false`" + ` flags documentation-only rows: the engine returns before ` +
"`EmitTrailers`" + ` fires (typically error-path fixtures), so the runner skips the assertion at execution time. The route value stays in the matrix as planning-time documentation.
`
func writeSummary(b *strings.Builder, counts map[string]int, total int) {
b.WriteString("## Per-route totals\n\n| Route | Count |\n|---|---|\n")
for _, name := range runner.KnownRouteNames() {
if n := counts[name]; n > 0 {
fmt.Fprintf(b, "| `%s` | %d |\n", name, n)
}
}
// Print any non-canonical buckets (allowlist-rendered rows,
// `(unassigned)`) AFTER the canonical block so a stray label
// is visible to a reviewer.
for k := range counts {
if !isCanonicalRoute(k) {
fmt.Fprintf(b, "| `%s` | %d |\n", k, counts[k])
}
}
fmt.Fprintf(b, "| **total** | %d |\n", total)
}
func writePerFixture(b *strings.Builder, rows []matrixRow) {
b.WriteString("\n## Per-fixture\n\n| Shape | Route | Strict |\n|---|---|---|\n")
for _, r := range rows {
strict := "true"
if !r.strict {
strict = "false"
}
fmt.Fprintf(b, "| `%s` | `%s` | %s |\n", r.shape, r.route, strict)
}
}
func isCanonicalRoute(s string) bool {
return slices.Contains(runner.KnownRouteNames(), s)
}
// Binary runner is the conformance harness's CLI. It loads YAML
// fixtures, iterates the engine x storage profile matrix, and emits
// PASS / FAIL records (or a JSON report). See `conformance/README.md`
// for the fixture schema and worked examples; this file is just flag
// parsing and exit-code wiring.
package main
import (
"context"
"errors"
"flag"
"fmt"
"io"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
)
// stringSliceFlag is a repeatable flag value, so `--profile duckdb`
// (and any future profile names) accumulate into one slice rather
// than overwriting.
type stringSliceFlag []string
func (s *stringSliceFlag) String() string { return strings.Join(*s, ",") }
func (s *stringSliceFlag) Set(v string) error {
if v == "" {
return nil
}
*s = append(*s, v)
return nil
}
func main() {
code, err := run()
if err != nil {
_, _ = fmt.Fprintln(os.Stderr, "runner:", err)
os.Exit(2)
}
if code != 0 {
os.Exit(code)
}
}
// runnerConfig is the parsed view of the CLI flags that `run` hands
// off to the conformance runner. Pulled out of run() so the flag-
// parsing block (and the engine-binary / --connect mutual-exclusion
// rule) can live in its own helper without smuggling state through
// closures.
type runnerConfig struct {
Fixtures string
EngineBinary string
Connect string
UpdateBaselines bool
Output string
OutputFile string
Profiles []string
HelpExit bool // user passed --help; main should exit 0.
}
// run drives the binary's flag parse + signal handling + runner.Run
// orchestration. Returns the exit code main should hand to os.Exit
// (so any defers in this function actually fire) plus any
// runner-internal error main should print.
func run() (int, error) {
cfg, err := parseFlags(os.Args[1:])
if err != nil {
if errors.Is(err, flag.ErrHelp) {
return 0, nil
}
return 0, err
}
if cfg.HelpExit {
return 0, nil
}
runnerStdout, cleanup, err := setupOutputFile(cfg.OutputFile)
if err != nil {
return 0, err
}
if cleanup != nil {
defer cleanup()
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// SIGINT/SIGTERM cancel the runner's context so the harness can
// SIGINT every emulator subprocess it spawned. The runner
// returns its in-progress Report so the caller still sees what
// PASSed before the cancel.
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
<-sigCh
cancel()
}()
defer signal.Stop(sigCh)
report, err := runner.Run(ctx, runner.Options{
FixturesPath: cfg.Fixtures,
Harness: runner.HarnessOptions{
EngineBinary: cfg.EngineBinary,
ConnectAddress: cfg.Connect,
EngineStdout: os.Stderr,
EngineStderr: os.Stderr,
},
Profiles: cfg.Profiles,
UpdateBaselines: cfg.UpdateBaselines,
Output: cfg.Output,
Out: runnerStdout,
Err: os.Stderr,
})
if err != nil {
return 0, err
}
return report.ExitCode(), nil
}
// parseFlags wires up the CLI's flag set, applies the
// --engine-binary / --connect mutual-exclusion rule, and returns a
// runnerConfig. Returns flag.ErrHelp when the user passed --help so
// the caller can short-circuit cleanly.
func parseFlags(args []string) (runnerConfig, error) {
fs := flag.NewFlagSet("runner", flag.ContinueOnError)
fs.SetOutput(os.Stderr)
var (
fixtures = fs.String("fixtures", "conformance/fixtures", "directory or file containing fixture YAML")
engineBinary = fs.String(
"engine-binary",
"./bin/emulator_main",
"path to emulator_main; mutually exclusive with --connect",
)
connect = fs.String(
"connect",
"",
"HOST:PORT of an already-running engine to dial instead of spawning emulator_main",
)
updateBaselines = fs.Bool(
"update-baselines",
false,
"overwrite each fixture's expected: block with the captured response (bootstrap mode)",
)
output = fs.String("output", "text", "output format: text or json")
outputFile = fs.String(
"output-file",
"",
"if non-empty, write the rendered report to this file (atomic write) in addition to stdout",
)
profiles stringSliceFlag
showHelp = fs.Bool("help", false, "print usage and exit")
)
fs.Var(&profiles, "profile", "restrict the matrix to one profile (repeatable). Default: all known profiles")
fs.Usage = func() { writeUsage(fs) }
if err := fs.Parse(args); err != nil {
return runnerConfig{}, err
}
if *showHelp {
fs.Usage()
return runnerConfig{HelpExit: true}, nil
}
if *engineBinary != "" && *connect != "" {
// Flag default is `./bin/emulator_main`; only treat it as
// user-supplied when --connect is the empty default. The
// CLI lets the user pick either path explicitly.
if *engineBinary != "./bin/emulator_main" {
return runnerConfig{}, errors.New("--engine-binary and --connect are mutually exclusive")
}
*engineBinary = ""
}
return runnerConfig{
Fixtures: *fixtures,
EngineBinary: *engineBinary,
Connect: *connect,
UpdateBaselines: *updateBaselines,
Output: *output,
OutputFile: *outputFile,
Profiles: []string(profiles),
}, nil
}
// writeUsage emits the runner's --help banner. Pulled out of
// parseFlags so the flag-parsing function stays under the funlen
// limit; the heredoc's prose is the bulk of its line count.
func writeUsage(fs *flag.FlagSet) {
_, _ = fmt.Fprintln(fs.Output(), `Usage: runner [flags]
Run the BigQuery emulator conformance fixtures and diff against
expected rows or errors. By default the runner spawns its own
emulator_main subprocess per fixture x profile; --connect HOST:PORT
reaches an already-running gateway (used by CI).
Flags:`)
fs.PrintDefaults()
_, _ = fmt.Fprintln(fs.Output(), `
Profiles:
duckdb duckdb engine + duckdb storage (only profile today)
Exit codes:
0 every fixture x profile PASSed
1 at least one fixture x profile FAILed
2 runner-internal error (bad YAML, can't start engine, etc)
See conformance/README.md for the fixture schema and JSON output
shape.`)
}
// setupOutputFile honors --output-file: it opens a sibling tmp file,
// returns an io.MultiWriter that tees the runner's output into both
// stdout and the tmp file, plus a cleanup closure the caller must
// defer to atomically rename the tmp file into place. When the flag
// is empty, returns os.Stdout and a nil cleanup.
//
// We rename regardless of whether the runner returned an error or
// reported a non-zero exit code (fixture mismatch): the artifact is
// still the most useful diagnostic the workflow has on hand. Only a
// CreateTemp failure (out of disk, perm denied) short-circuits
// before any data lands.
func setupOutputFile(path string) (io.Writer, func(), error) {
if path == "" {
return os.Stdout, nil, nil
}
dir := filepath.Dir(path)
if dir == "" {
dir = "."
}
tmp, err := os.CreateTemp(dir, ".conformance-runner-*.tmp")
if err != nil {
return nil, nil, fmt.Errorf("create --output-file tmp: %w", err)
}
tmpName := tmp.Name()
cleanup := func() {
_ = tmp.Close()
if err := os.Rename(tmpName, path); err != nil {
_, _ = fmt.Fprintln(os.Stderr, "runner: rename --output-file:", err)
_ = os.Remove(tmpName)
}
}
return io.MultiWriter(os.Stdout, tmp), cleanup, nil
}
// session runs stateful multi-step conformance sessions against one long-lived
// emulator process per session.
package main
import (
"context"
"errors"
"flag"
"fmt"
"io"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
)
type sessionConfig struct {
Sessions string
EngineBinary string
Connect string
Profile string
Output string
OutputFile string
IncludeSelfTest bool
}
func main() {
code, err := run()
if err != nil {
_, _ = fmt.Fprintln(os.Stderr, "session:", err)
os.Exit(2)
}
if code != 0 {
os.Exit(code)
}
}
func run() (int, error) {
cfg, err := parseFlags(os.Args[1:])
if err != nil {
if errors.Is(err, flag.ErrHelp) {
return 0, nil
}
return 0, err
}
out, cleanup, err := setupOutputFile(cfg.OutputFile)
if err != nil {
return 0, err
}
if cleanup != nil {
defer cleanup()
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
go func() {
<-sigCh
cancel()
}()
defer signal.Stop(sigCh)
var profiles []string
if strings.TrimSpace(cfg.Profile) != "" {
profiles = []string{cfg.Profile}
}
report, err := runner.RunSessions(ctx, runner.SessionOptions{
SessionsPath: cfg.Sessions,
IncludeSelfTest: cfg.IncludeSelfTest,
Harness: runner.HarnessOptions{
EngineBinary: cfg.EngineBinary,
ConnectAddress: cfg.Connect,
EngineStdout: os.Stderr,
EngineStderr: os.Stderr,
},
Profiles: profiles,
Output: cfg.Output,
Out: out,
Err: os.Stderr,
})
if err != nil {
return 0, err
}
return report.ExitCode(), nil
}
func parseFlags(args []string) (sessionConfig, error) {
fs := flag.NewFlagSet("session", flag.ContinueOnError)
fs.SetOutput(os.Stderr)
var cfg sessionConfig
fs.StringVar(&cfg.Sessions, "sessions", runner.DefaultSessionsDir, "session directory or single YAML")
fs.StringVar(&cfg.EngineBinary, "engine-binary", "./bin/emulator_main", "path to emulator_main")
fs.StringVar(&cfg.Connect, "connect", "", "HOST:PORT of a running gateway")
fs.StringVar(&cfg.Profile, "profile", "", "runtime profile (default: all known profiles)")
fs.StringVar(&cfg.Output, "output", "text", "output format: text or json")
fs.StringVar(&cfg.OutputFile, "output-file", "", "tee report to this file (atomic write)")
fs.BoolVar(&cfg.IncludeSelfTest, "include-selftest", false, "run _-prefixed self-test session files")
if err := fs.Parse(args); err != nil {
return sessionConfig{}, err
}
if cfg.EngineBinary != "" && cfg.Connect != "" && cfg.EngineBinary != "./bin/emulator_main" {
return sessionConfig{}, errors.New("--engine-binary and --connect are mutually exclusive")
}
if cfg.Connect != "" {
cfg.EngineBinary = ""
}
return cfg, nil
}
func setupOutputFile(path string) (io.Writer, func(), error) {
if path == "" {
return os.Stdout, nil, nil
}
dir := filepath.Dir(path)
if dir == "" {
dir = "."
}
tmp, err := os.CreateTemp(dir, ".session-*.tmp")
if err != nil {
return nil, nil, fmt.Errorf("create --output-file tmp: %w", err)
}
tmpName := tmp.Name()
cleanup := func() {
_ = tmp.Close()
if err := os.Rename(tmpName, path); err != nil {
_, _ = fmt.Fprintln(os.Stderr, "session: rename --output-file:", err)
_ = os.Remove(tmpName)
}
}
return io.MultiWriter(os.Stdout, tmp), cleanup, nil
}
package differential
import (
"strings"
)
// DivergenceKind classifies emulator vs oracle outcomes for the differential lane.
type DivergenceKind string
const (
KindMatch DivergenceKind = "match"
KindFeatureGap DivergenceKind = "feature_gap"
KindSemanticDivergence DivergenceKind = "semantic_divergence"
KindErrorDivergence DivergenceKind = "error_divergence"
KindCrash DivergenceKind = "crash"
)
// ClassifyInput carries the signals ClassifyDivergence needs.
type ClassifyInput struct {
OracleSuccess bool
EmulatorSuccess bool
EmulatorStatus int
EmulatorBody []byte
Diff string
RunnerMessage string
}
// ClassifyDivergence maps a replay outcome to a divergence bucket.
func ClassifyDivergence(in ClassifyInput) DivergenceKind {
msg := strings.ToLower(in.RunnerMessage + " " + in.Diff + " " + string(in.EmulatorBody))
if isCrashSignal(msg) {
return KindCrash
}
if isFeatureGap(msg) {
return KindFeatureGap
}
if in.OracleSuccess != in.EmulatorSuccess {
return KindErrorDivergence
}
if in.Diff != "" {
return KindSemanticDivergence
}
return KindMatch
}
func isFeatureGap(msg string) bool {
needles := []string{
"unimplemented",
"not implemented",
"not yet implemented",
"setoperationscan op is not union all",
"withrefscan without active withscan bindings",
}
for _, n := range needles {
if strings.Contains(msg, n) {
return true
}
}
return false
}
func isCrashSignal(msg string) bool {
needles := []string{
"signal: killed",
"signal: aborted",
"engine process exited",
"lost connection",
"connection refused",
"broken pipe",
"segfault",
"core dumped",
}
for _, n := range needles {
if strings.Contains(msg, n) {
return true
}
}
return false
}
// Package differential replays a corpus of SQL cases against the local
// emulator and diffs results against committed production-BigQuery oracles.
package differential
import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
"gopkg.in/yaml.v3"
)
// CorpusCase is one differential-lane YAML under conformance/differential/corpus/.
// It reuses the fixture setup/query schema but pins expectations in a separate
// oracle JSON file referenced by OracleRef.
type CorpusCase struct {
Name string `yaml:"name"`
Description string `yaml:"description,omitempty"`
Profiles []string `yaml:"profiles,omitempty"`
ProjectID string `yaml:"project_id,omitempty"`
DefaultDataset string `yaml:"default_dataset,omitempty"`
OracleRef string `yaml:"oracle_ref"`
OracleSource string `yaml:"oracle_source,omitempty"`
Match runner.MatchMode `yaml:"match,omitempty"`
KnownFailing bool `yaml:"known_failing,omitempty"`
QueryParameters []QueryParameterYAML `yaml:"query_parameters,omitempty"`
Setup []runner.SetupStep `yaml:"setup,omitempty"`
Query string `yaml:"query"`
Path string `yaml:"-"`
}
// QueryParameterYAML is the corpus-side spelling of a named query parameter.
type QueryParameterYAML struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
Value string `yaml:"value,omitempty"`
ArrayElementType string `yaml:"array_element_type,omitempty"`
ArrayValues []string `yaml:"array_values,omitempty"`
StructFields []QueryParameterStructFieldYAML `yaml:"struct_fields,omitempty"`
StructValues map[string]string `yaml:"struct_values,omitempty"`
}
// QueryParameterStructFieldYAML names one STRUCT parameter field.
type QueryParameterStructFieldYAML struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
}
// DefaultCorpusDir is the committed corpus root.
const DefaultCorpusDir = "conformance/differential/corpus"
// DefaultOracleDir is the committed oracle JSON root.
const DefaultOracleDir = "conformance/differential/oracle"
// LoadCorpus parses a single corpus YAML file.
func LoadCorpus(path string) (*CorpusCase, error) {
data, err := os.ReadFile(path) //nolint:gosec // path is CLI-controlled
if err != nil {
return nil, fmt.Errorf("read %s: %w", path, err)
}
return loadCorpusBytes(data, path)
}
func loadCorpusBytes(data []byte, path string) (*CorpusCase, error) {
var c CorpusCase
dec := yaml.NewDecoder(strings.NewReader(string(data)))
dec.KnownFields(true)
if err := dec.Decode(&c); err != nil {
return nil, fmt.Errorf("parse %s: %w", path, err)
}
c.Path = path
if err := c.normalize(); err != nil {
return nil, fmt.Errorf("validate %s: %w", path, err)
}
return &c, nil
}
// LoadCorpusDir walks dir (or loads a single file) and returns every corpus
// case. Files and directories whose basename starts with "_" are skipped
// unless includeSelfTest is true (unit/self-test lane).
func LoadCorpusDir(pathOrDir string, includeSelfTest bool) ([]*CorpusCase, error) {
info, err := os.Stat(pathOrDir)
if err != nil {
return nil, fmt.Errorf("stat %s: %w", pathOrDir, err)
}
if !info.IsDir() {
c, err := LoadCorpus(pathOrDir)
if err != nil {
return nil, err
}
return []*CorpusCase{c}, nil
}
var cases []*CorpusCase
walkErr := filepath.Walk(pathOrDir, func(p string, fi os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if fi.IsDir() {
base := filepath.Base(p)
if base != filepath.Base(pathOrDir) && strings.HasPrefix(base, "_") && !includeSelfTest {
return filepath.SkipDir
}
return nil
}
ext := strings.ToLower(filepath.Ext(p))
if ext != ".yaml" && ext != ".yml" {
return nil
}
base := filepath.Base(p)
if strings.HasPrefix(base, "_") && !includeSelfTest {
return nil
}
c, err := LoadCorpus(p)
if err != nil {
return err
}
cases = append(cases, c)
return nil
})
if walkErr != nil {
return nil, walkErr
}
sort.Slice(cases, func(i, j int) bool { return cases[i].Path < cases[j].Path })
return cases, nil
}
func (c *CorpusCase) normalize() error {
if strings.TrimSpace(c.Name) == "" {
return errors.New("name is required")
}
if strings.TrimSpace(c.Query) == "" {
return errors.New("query is required")
}
if strings.TrimSpace(c.OracleRef) == "" {
return errors.New("oracle_ref is required")
}
if c.ProjectID == "" {
c.ProjectID = "proj-diff-" + sanitizeID(c.Name)
}
if len(c.Profiles) == 0 {
c.Profiles = []string{runner.ProfileDuckDB}
}
known := make(map[string]bool, len(runner.KnownProfiles()))
for _, p := range runner.KnownProfiles() {
known[p.Name] = true
}
for _, p := range c.Profiles {
if !known[p] {
return fmt.Errorf("unknown profile %q", p)
}
}
for i, step := range c.Setup {
if err := step.ValidateExported(); err != nil {
return fmt.Errorf("setup[%d]: %w", i, err)
}
}
return nil
}
func sanitizeID(s string) string {
var b strings.Builder
b.Grow(len(s))
for _, r := range strings.ToLower(s) {
switch {
case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
b.WriteRune(r)
case r == '-':
b.WriteRune('-')
default:
b.WriteRune('-')
}
}
return b.String()
}
package differential
import (
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// OracleError is the recorded production BigQuery error envelope.
type OracleError struct {
Code int `json:"code"`
Message string `json:"message"`
}
// Oracle is a committed production-BigQuery capture for one corpus case.
// Rows/schema mirror bqtypes.QueryResponse so the fixture comparator is reused.
type Oracle struct {
CapturedAt string `json:"captured_at"`
Project string `json:"project"`
JobID string `json:"job_id,omitempty"`
OracleSource string `json:"oracle_source,omitempty"`
Match runner.MatchMode `json:"match,omitempty"`
Success bool `json:"success"`
Schema *bqtypes.TableSchema `json:"schema,omitempty"`
Rows []bqtypes.Row `json:"rows,omitempty"`
JobReference *bqtypes.JobReference `json:"jobReference,omitempty"`
Error *OracleError `json:"error,omitempty"`
}
// LoadOracle reads oracle JSON referenced by ref (basename or relative path).
func LoadOracle(oracleDir, ref string) (*Oracle, error) {
path := ref
if !strings.Contains(ref, string(os.PathSeparator)) {
path = filepath.Join(oracleDir, ref)
}
data, err := os.ReadFile(path) //nolint:gosec // oracle dir is CLI-controlled
if err != nil {
return nil, fmt.Errorf("read oracle %s: %w", path, err)
}
var o Oracle
if err := json.Unmarshal(data, &o); err != nil {
return nil, fmt.Errorf("parse oracle %s: %w", path, err)
}
if err := o.validate(); err != nil {
return nil, fmt.Errorf("validate oracle %s: %w", path, err)
}
return &o, nil
}
func (o *Oracle) validate() error {
if o.CapturedAt == "" {
return errors.New("captured_at is required")
}
if o.Project == "" {
return errors.New("project is required")
}
if o.Success {
if o.Schema == nil || len(o.Schema.Fields) == 0 {
return errors.New("success oracle requires schema.fields")
}
return nil
}
if o.Error == nil {
return errors.New("error oracle requires error block")
}
if o.Error.Code == 0 && o.Error.Message == "" {
return errors.New("error oracle requires code or message")
}
return nil
}
// ExpectationFromOracle converts wire rows into the runner Expectation used by
// CompareRows / CompareError.
func ExpectationFromOracle(o *Oracle, caseMatch runner.MatchMode) runner.Expectation {
match := o.Match
if match == "" {
match = caseMatch
}
if match == "" {
match = runner.MatchOrdered
}
if !o.Success {
exp := runner.ExpectedError{MessageContains: o.Error.Message}
if o.Error.Code != 0 {
exp.Code = o.Error.Code
}
return runner.Expectation{Match: match, Error: &exp}
}
cols := schemaColumns(o.Schema)
rows := make([]map[string]any, 0, len(o.Rows))
for _, r := range o.Rows {
row := make(map[string]any, len(r.F))
for i, cell := range r.F {
name := positionalName(cols, i)
row[name] = oracleCellValue(cell.V)
}
rows = append(rows, row)
}
return runner.Expectation{Match: match, Rows: rows}
}
// WriteOracle atomically writes an oracle JSON file.
func WriteOracle(path string, o *Oracle) error {
if o.CapturedAt == "" {
o.CapturedAt = time.Now().UTC().Format(time.RFC3339)
}
data, err := json.MarshalIndent(o, "", " ")
if err != nil {
return err
}
data = append(data, '\n')
dir := filepath.Dir(path)
tmp, err := os.CreateTemp(dir, ".oracle-*.tmp")
if err != nil {
return err
}
tmpName := tmp.Name()
if _, err := tmp.Write(data); err != nil {
_ = tmp.Close()
_ = os.Remove(tmpName)
return err
}
if err := tmp.Close(); err != nil {
_ = os.Remove(tmpName)
return err
}
return os.Rename(tmpName, path)
}
func oracleCellValue(v any) any {
if v == nil {
return nil
}
if s, ok := v.(string); ok {
return s
}
return v
}
func schemaColumns(schema *bqtypes.TableSchema) []string {
if schema == nil {
return nil
}
out := make([]string, len(schema.Fields))
for i, f := range schema.Fields {
out[i] = f.Name
}
return out
}
func positionalName(cols []string, i int) string {
if i < len(cols) {
return cols[i]
}
return fmt.Sprintf("col%d", i)
}
// OracleFromQueryResponse builds a success oracle from a gateway QueryResponse body.
func OracleFromQueryResponse(project string, source string, match runner.MatchMode, body []byte) (*Oracle, error) {
var run bqtypes.QueryResponse
if err := json.Unmarshal(body, &run); err != nil {
return nil, fmt.Errorf("decode QueryResponse: %w", err)
}
o := &Oracle{
Project: project,
OracleSource: source,
Match: match,
Success: true,
Schema: run.Schema,
Rows: run.Rows,
JobReference: run.JobReference,
}
if run.JobReference != nil {
o.JobID = run.JobReference.JobID
}
return o, nil
}
package differential
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
const JSONSchemaVersion = 1
// Options configures a differential replay invocation.
type Options struct {
CorpusDir string
OracleDir string
IncludeSelfTest bool
Harness runner.HarnessOptions
Profile string
Output string
Out io.Writer
Err io.Writer
}
// Result is one corpus case outcome.
type Result struct {
Case string `json:"case"`
Path string `json:"path"`
Profile string `json:"profile"`
Status runner.Status `json:"status"`
Divergence DivergenceKind `json:"divergence,omitempty"`
KnownFailing bool `json:"known_failing,omitempty"`
OracleSource string `json:"oracle_source,omitempty"`
DurationMs int64 `json:"duration_ms"`
Message string `json:"message,omitempty"`
Diff string `json:"diff,omitempty"`
}
// Report aggregates a differential lane run.
type Report struct {
SchemaVersion int `json:"schema_version"`
Summary runner.Summary `json:"summary"`
Results []Result `json:"results"`
}
// ExitCode mirrors the fixture runner semantics.
func (r *Report) ExitCode() int {
if r == nil {
return 2
}
if r.Summary.Failed > 0 {
return 1
}
return 0
}
// Run replays every corpus case against the emulator and diffs vs committed oracles.
func Run(ctx context.Context, opts Options) (*Report, error) {
normalizeOptions(&opts)
cases, err := LoadCorpusDir(opts.CorpusDir, opts.IncludeSelfTest)
if err != nil {
return nil, err
}
profile, ok := runner.LookupProfile(opts.Profile)
if !ok {
return nil, fmt.Errorf("unknown profile %q", opts.Profile)
}
report := &Report{SchemaVersion: JSONSchemaVersion}
for _, c := range cases {
res := runCase(ctx, opts, profile, c)
report.Results = append(report.Results, res)
report.Summary.Total++
switch res.Status {
case runner.StatusPass:
report.Summary.Passed++
case runner.StatusFail:
report.Summary.Failed++
case runner.StatusSkip:
report.Summary.Skipped++
}
}
if err := renderReport(opts, report); err != nil {
return report, err
}
return report, nil
}
func normalizeOptions(opts *Options) {
if opts.CorpusDir == "" {
opts.CorpusDir = DefaultCorpusDir
}
if opts.OracleDir == "" {
opts.OracleDir = DefaultOracleDir
}
if opts.Profile == "" {
opts.Profile = runner.ProfileDuckDB
}
if opts.Output == "" {
opts.Output = "text"
}
if opts.Out == nil {
opts.Out = os.Stdout
}
if opts.Err == nil {
opts.Err = os.Stderr
}
}
func runCase(ctx context.Context, opts Options, profile runner.Profile, c *CorpusCase) Result {
started := time.Now()
res := Result{
Case: c.Name,
Path: c.Path,
Profile: profile.Name,
Status: runner.StatusFail,
KnownFailing: c.KnownFailing,
OracleSource: c.OracleSource,
}
oracle, err := LoadOracle(opts.OracleDir, c.OracleRef)
if err != nil {
res.Message = "load oracle: " + err.Error()
res.Divergence = KindCrash
return finish(res, started)
}
if res.OracleSource == "" {
res.OracleSource = oracle.OracleSource
}
env, startErr := runner.StartEmulator(ctx, opts.Harness, profile)
if startErr != nil {
res.Message = "start emulator: " + startErr.Error()
res.Divergence = KindCrash
return finish(res, started)
}
defer func() { _ = env.Close() }()
base := env.BaseURL + "/bigquery/v2/projects/" + c.ProjectID
if setupErr := runner.RunSetupSteps(ctx, base, env.DataDir(), c.Setup, c.DefaultDataset); setupErr != nil {
res.Message = setupErr.Error()
res.Divergence = ClassifyDivergence(ClassifyInput{RunnerMessage: res.Message})
return finishMaybeKnown(res, started, c.KnownFailing)
}
params := toWireParams(c.QueryParameters)
queryBody, err := runner.MarshalJobsQueryBody(c.Query, c.DefaultDataset, params)
if err != nil {
res.Message = err.Error()
res.Divergence = KindCrash
return finish(res, started)
}
status, body, queryErr := runner.DoRequest(ctx, base+"/queries", queryBody)
if queryErr != nil {
res.Message = "query rpc: " + queryErr.Error()
res.Divergence = ClassifyDivergence(ClassifyInput{RunnerMessage: res.Message})
return finishMaybeKnown(res, started, c.KnownFailing)
}
return compareAgainstOracle(res, oracle, c, status, body, started)
}
func compareAgainstOracle(
res Result,
oracle *Oracle,
c *CorpusCase,
status int,
body []byte,
started time.Time,
) Result {
exp := ExpectationFromOracle(oracle, c.Match)
emulatorSuccess := status >= 200 && status < 300
if exp.Error != nil {
return compareErrorOracle(res, exp, c, status, body, emulatorSuccess, started)
}
if !emulatorSuccess {
res.Message = fmt.Sprintf("query failed with HTTP %d", status)
res.Diff = "body: " + snippet(body)
res.Divergence = ClassifyDivergence(ClassifyInput{
OracleSuccess: true, EmulatorSuccess: false,
EmulatorStatus: status, EmulatorBody: body,
Diff: res.Diff, RunnerMessage: res.Message,
})
return finishMaybeKnown(res, started, c.KnownFailing)
}
var run bqtypes.QueryResponse
if err := json.Unmarshal(body, &run); err != nil {
res.Message = "decode QueryResponse: " + err.Error()
res.Divergence = KindCrash
return finish(res, started)
}
if diff := runner.CompareRows(exp, run.Schema, run.Rows); diff != "" {
res.Message = "row mismatch"
res.Diff = diff
res.Divergence = ClassifyDivergence(ClassifyInput{
OracleSuccess: true, EmulatorSuccess: true,
EmulatorStatus: status, EmulatorBody: body,
Diff: diff, RunnerMessage: res.Message,
})
return finishMaybeKnown(res, started, c.KnownFailing)
}
res.Status = runner.StatusPass
res.Divergence = KindMatch
return finish(res, started)
}
func compareErrorOracle(
res Result,
exp runner.Expectation,
c *CorpusCase,
status int,
body []byte,
emulatorSuccess bool,
started time.Time,
) Result {
if emulatorSuccess {
res.Message = "expected error, got success"
res.Diff = fmt.Sprintf("status: %d\nbody: %s", status, snippet(body))
} else if diff := runner.CompareError(*exp.Error, status, body); diff != "" {
res.Message = "error mismatch"
res.Diff = diff
} else {
res.Status = runner.StatusPass
res.Divergence = KindMatch
return finish(res, started)
}
res.Divergence = ClassifyDivergence(ClassifyInput{
OracleSuccess: false, EmulatorSuccess: emulatorSuccess,
EmulatorStatus: status, EmulatorBody: body,
Diff: res.Diff, RunnerMessage: res.Message,
})
return finishMaybeKnown(res, started, c.KnownFailing)
}
func finishMaybeKnown(res Result, started time.Time, knownFailing bool) Result {
res = finish(res, started)
if knownFailing && res.Status == runner.StatusFail {
res.Status = runner.StatusSkip
res.Message = "known_failing (expected divergence): " + res.Message
}
return res
}
func finish(res Result, started time.Time) Result {
res.DurationMs = time.Since(started).Milliseconds()
return res
}
func toWireParams(params []QueryParameterYAML) []bqtypes.QueryParameter {
if len(params) == 0 {
return nil
}
out := make([]bqtypes.QueryParameter, 0, len(params))
for _, p := range params {
paramType := &bqtypes.QueryParameterType{Type: strings.ToUpper(p.Type)}
paramValue := &bqtypes.QueryParameterValue{Value: p.Value}
if elem := strings.TrimSpace(p.ArrayElementType); elem != "" {
paramType.ArrayType = &bqtypes.QueryParameterType{
Type: strings.ToUpper(elem),
}
}
if len(p.StructFields) > 0 {
for _, f := range p.StructFields {
paramType.StructTypes = append(paramType.StructTypes, bqtypes.QueryParameterStructType{
Name: f.Name,
Type: bqtypes.QueryParameterType{Type: strings.ToUpper(f.Type)},
})
}
}
if len(p.ArrayValues) > 0 {
paramValue.ArrayValues = make([]bqtypes.QueryParameterValue, 0, len(p.ArrayValues))
for _, v := range p.ArrayValues {
paramValue.ArrayValues = append(paramValue.ArrayValues, bqtypes.QueryParameterValue{
Value: v,
})
}
paramValue.Value = ""
}
if len(p.StructValues) > 0 {
paramValue.StructValues = make(map[string]bqtypes.QueryParameterValue, len(p.StructValues))
for name, v := range p.StructValues {
paramValue.StructValues[name] = bqtypes.QueryParameterValue{Value: v}
}
paramValue.Value = ""
}
out = append(out, bqtypes.QueryParameter{
Name: p.Name,
ParameterType: paramType,
ParameterValue: paramValue,
})
}
return out
}
func snippet(b []byte) string {
const limit = 240
s := strings.TrimSpace(string(b))
if len(s) > limit {
s = s[:limit] + "..."
}
return s
}
func renderReport(opts Options, report *Report) error {
switch opts.Output {
case "json":
enc := json.NewEncoder(opts.Out)
enc.SetIndent("", " ")
return enc.Encode(report)
default:
return renderText(opts.Out, report)
}
}
func renderText(w io.Writer, report *Report) error {
_, _ = fmt.Fprintf(w, "differential summary: %d total, %d passed, %d failed, %d skipped\n",
report.Summary.Total, report.Summary.Passed, report.Summary.Failed, report.Summary.Skipped)
for _, r := range report.Results {
line := fmt.Sprintf("%s %s %s", r.Status, r.Case, r.Divergence)
if r.Message != "" {
line += " — " + r.Message
}
_, _ = fmt.Fprintln(w, line)
if r.Diff != "" {
_, _ = fmt.Fprintln(w, r.Diff)
}
}
return nil
}
package googlesqlcorpus
import (
"errors"
"fmt"
"strconv"
"strings"
)
const (
corpusValNaN = "nan"
corpusValInf = "inf"
corpusValNInf = "-inf"
)
// ExpectedResult is the parsed ARRAY<...>[...] expectation from a
// .test case.
type ExpectedResult struct {
Ordered bool
Rows [][]any
}
// ParseExpected parses the GoogleSQL compliance result stanza.
func ParseExpected(raw string) (ExpectedResult, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return ExpectedResult{}, errors.New("empty expected block")
}
p := &expectedParser{s: raw}
rows, ordered, err := p.parseTopArray()
if err != nil {
return ExpectedResult{}, err
}
if p.pos < len(p.s) && strings.TrimSpace(p.s[p.pos:]) != "" {
return ExpectedResult{}, fmt.Errorf("trailing input at %d", p.pos)
}
return ExpectedResult{Ordered: ordered, Rows: rows}, nil
}
type expectedParser struct {
s string
pos int
}
func (p *expectedParser) parseTopArray() ([][]any, bool, error) {
p.skipSpace()
if !p.consume("ARRAY<") {
return nil, false, fmt.Errorf("expected ARRAY< at %d", p.pos)
}
if err := p.skipTypeExpr(); err != nil {
return nil, false, err
}
if !p.consume(">") {
return nil, false, fmt.Errorf("expected > at %d", p.pos)
}
p.skipSpace()
if !p.consume("[") {
return nil, false, fmt.Errorf("expected [ at %d", p.pos)
}
ordered := p.consume("known order:")
rows, err := p.parseRows()
if err != nil {
return nil, false, err
}
if !p.consume("]") {
return nil, false, fmt.Errorf("expected ] at %d", p.pos)
}
return rows, ordered, nil
}
func (p *expectedParser) skipTypeExpr() error {
depth := 1
for p.pos < len(p.s) && depth > 0 {
switch p.s[p.pos] {
case '<':
depth++
p.pos++
case '>':
depth--
if depth > 0 {
p.pos++
}
default:
p.pos++
}
}
if depth != 0 {
return fmt.Errorf("unterminated type expr at %d", p.pos)
}
return nil
}
func (p *expectedParser) parseRows() ([][]any, error) {
p.skipSpace()
if p.peek() == ']' {
return nil, nil
}
var rows [][]any
for {
p.skipSpace()
cells, err := p.parseRow()
if err != nil {
return nil, err
}
rows = append(rows, cells)
p.skipSpace()
if p.peek() == ']' || p.peek() == 0 {
break
}
if !p.consume(",") {
return nil, fmt.Errorf("expected , between rows at %d", p.pos)
}
}
return rows, nil
}
func (p *expectedParser) parseRow() ([]any, error) {
if !p.consume("{") {
return nil, fmt.Errorf("expected { at %d", p.pos)
}
var cells []any
for {
p.skipSpace()
if p.consume("}") {
return cells, nil
}
if len(cells) > 0 {
if !p.consume(",") {
return nil, fmt.Errorf("expected , between cells at %d", p.pos)
}
p.skipSpace()
}
val, err := p.parseValue()
if err != nil {
return nil, err
}
cells = append(cells, val)
}
}
func (p *expectedParser) parseValue() (any, error) {
p.skipSpace()
if p.peek() == '{' {
nested, err := p.parseRow()
if err != nil {
return nil, err
}
return nested, nil
}
if p.consume("NULL") {
return nil, nil
}
if p.consume("true") {
return true, nil
}
if p.consume("false") {
return false, nil
}
if p.consume(corpusValNaN) {
return corpusValNaN, nil
}
if p.consume(corpusValNInf) {
return corpusValNInf, nil
}
if p.consume(corpusValInf) {
return corpusValInf, nil
}
if v, ok, err := p.tryParseQuotedValue(); ok || err != nil {
return v, err
}
if v, ok, err := p.tryParseTypedLiteral(); ok || err != nil {
return v, err
}
if isDigit(p.peek()) || p.peek() == '-' {
return p.readBareToken(), nil
}
return nil, fmt.Errorf("unexpected value at %d", p.pos)
}
func (p *expectedParser) tryParseQuotedValue() (any, bool, error) {
if p.consume("b\"") || p.consume("b'") {
quote := p.s[p.pos-1]
s, err := p.readQuoted(quote)
if err != nil {
return nil, true, err
}
return "b:" + s, true, nil
}
if p.peek() == '"' || p.peek() == '\'' || p.peek() == '`' {
q := p.peek()
p.pos++
s, err := p.readQuoted(q)
if err != nil {
return nil, true, err
}
return s, true, nil
}
return nil, false, nil
}
func (p *expectedParser) tryParseTypedLiteral() (any, bool, error) {
for _, spec := range []struct {
prefix string
tag string
}{
{"DATE ", "DATE:"},
{"TIMESTAMP ", "TIMESTAMP:"},
{"TIME ", "TIME:"},
{"DATETIME ", "DATETIME:"},
} {
if !p.consume(spec.prefix) {
continue
}
p.skipSpace()
s, err := p.readDateOrTimestampLiteral()
if err != nil {
return nil, true, err
}
return spec.tag + s, true, nil
}
return nil, false, nil
}
func (p *expectedParser) readQuoted(quote byte) (string, error) {
var b strings.Builder
for p.pos < len(p.s) {
c := p.s[p.pos]
p.pos++
if c == '\\' && p.pos < len(p.s) {
b.WriteByte(p.s[p.pos])
p.pos++
continue
}
if c == quote {
return b.String(), nil
}
b.WriteByte(c)
}
return "", fmt.Errorf("unterminated string at %d", p.pos)
}
func (p *expectedParser) readDateOrTimestampLiteral() (string, error) {
p.skipSpace()
if p.peek() == '"' || p.peek() == '\'' {
q := p.peek()
p.pos++
return p.readQuoted(q)
}
return p.readBareToken(), nil
}
func (p *expectedParser) readBareToken() string {
start := p.pos
for p.pos < len(p.s) {
c := p.s[p.pos]
if c == ',' || c == '}' || c == ']' || c == ' ' || c == '\n' || c == '\t' {
break
}
p.pos++
}
return strings.TrimSpace(p.s[start:p.pos])
}
func (p *expectedParser) skipSpace() {
for p.pos < len(p.s) {
switch p.s[p.pos] {
case ' ', '\t', '\n', '\r':
p.pos++
default:
return
}
}
}
func (p *expectedParser) peek() byte {
if p.pos >= len(p.s) {
return 0
}
return p.s[p.pos]
}
func (p *expectedParser) consume(tok string) bool {
p.skipSpace()
if !strings.HasPrefix(p.s[p.pos:], tok) {
return false
}
p.pos += len(tok)
return true
}
func isDigit(c byte) bool {
return c >= '0' && c <= '9'
}
// ToRunnerRows maps positional expected cells onto the gateway schema's
// column names for typed comparison in the fixture diff engine.
func ToRunnerRows(cells [][]any, colNames []string) []map[string]any {
out := make([]map[string]any, 0, len(cells))
for _, row := range cells {
m := make(map[string]any, len(colNames))
for i, col := range colNames {
if i < len(row) {
m[col] = normalizeExpectedCell(row[i])
}
}
out = append(out, m)
}
return out
}
func normalizeExpectedCell(v any) any {
switch x := v.(type) {
case string:
if strings.HasPrefix(x, "b:") {
return x[2:]
}
if strings.HasPrefix(x, "DATE:") {
return x[5:]
}
if strings.HasPrefix(x, "TIMESTAMP:") {
return x[10:]
}
if strings.HasPrefix(x, "TIME:") {
return x[5:]
}
if strings.HasPrefix(x, "DATETIME:") {
return x[9:]
}
if x == corpusValNaN {
return "NaN"
}
if x == corpusValInf {
return corpusValInf
}
if x == corpusValNInf {
return corpusValNInf
}
if f, err := strconv.ParseFloat(x, 64); err == nil {
return f
}
return x
case []any:
// Nested struct rendered as JSON-ish for STRUCT columns until
// the lane grows struct-aware corpus cases.
return fmt.Sprint(x)
default:
return v
}
}
package googlesqlcorpus
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"slices"
)
// Manifest pins the passing subset and triage buckets for the lane.
type Manifest struct {
// Pinned lists case IDs in "file::name" form that must PASS.
Pinned []string `json:"pinned"`
// Triage records first-run bucket assignments keyed by case ID.
Triage map[string]TriageEntry `json:"triage,omitempty"`
// UnsupportedFeatures skips any case declaring one of these
// LanguageFeature tokens (without the FEATURE_ prefix).
UnsupportedFeatures []string `json:"unsupported_features,omitempty"`
}
// TriageEntry is one case's triage classification.
type TriageEntry struct {
Bucket string `json:"bucket"`
Message string `json:"message,omitempty"`
}
// CaseID returns the stable identifier for a test case.
func CaseID(tc TestCase) string {
name := tc.Name
if name == "" {
name = fmt.Sprintf("line_%d", tc.Line)
}
return fmt.Sprintf("%s::%s", filepath.Base(tc.File), name)
}
// LoadManifest reads pinned-passing metadata from disk.
func LoadManifest(path string) (*Manifest, error) {
b, err := os.ReadFile(path) //nolint:gosec // manifest path is CLI-controlled
if err != nil {
return nil, err
}
var m Manifest
if err := json.Unmarshal(b, &m); err != nil {
return nil, fmt.Errorf("decode manifest: %w", err)
}
return &m, nil
}
// SaveManifest writes manifest JSON atomically.
func SaveManifest(path string, m *Manifest) error {
b, err := json.MarshalIndent(m, "", " ")
if err != nil {
return err
}
tmp := path + ".tmp"
if err := os.WriteFile(tmp, append(b, '\n'), 0o600); err != nil { //nolint:gosec // conformance artifact
return err
}
return os.Rename(tmp, path)
}
// ShouldRun returns false when a case is outside the pinned gate or
// declares an unsupported feature.
func (m *Manifest) ShouldRun(tc TestCase, gatePinned bool) (bool, string) {
if gatePinned && !m.isPinned(tc) {
return false, "not in pinned manifest"
}
for _, feat := range tc.RequiredFeatures {
if slices.Contains(m.UnsupportedFeatures, feat) {
return false, "required feature out of scope: " + feat
}
}
return true, ""
}
func (m *Manifest) isPinned(tc TestCase) bool {
id := CaseID(tc)
return slices.Contains(m.Pinned, id)
}
package googlesqlcorpus
import (
"fmt"
"strings"
)
// TestFile is one vendored GoogleSQL compliance .test file.
type TestFile struct {
Path string
Defaults FileDefaults
Cases []TestCase
}
// FileDefaults captures file-level directives such as
// [default required_features=...].
type FileDefaults struct {
RequiredFeatures []string
}
// TestCase is one statement/expected-result pair from a .test file.
type TestCase struct {
File string
Name string
RequiredFeatures []string
PrepareDatabase bool
SQL string
Expected ExpectedResult
ExpectError string
Line int // 1-based line of the case's first directive
}
// ParseFile splits a byte-identical upstream .test file into cases.
func ParseFile(path string, content string) (*TestFile, error) {
blocks := splitTestBlocks(content)
out := &TestFile{Path: path}
fileDefaults := FileDefaults{}
for _, block := range blocks {
block = strings.TrimSpace(block)
if block == "" {
continue
}
meta, body, err := splitMetaAndBody(block)
if err != nil {
return nil, fmt.Errorf("%s: %w", path, err)
}
if len(meta.Defaults.RequiredFeatures) > 0 {
fileDefaults = mergeDefaults(fileDefaults, meta.Defaults)
out.Defaults = fileDefaults
}
if strings.TrimSpace(body) == "" {
continue
}
tc, err := parseCase(path, meta, body, fileDefaults)
if err != nil {
return nil, err
}
out.Cases = append(out.Cases, tc)
}
return out, nil
}
func splitTestBlocks(content string) []string {
lines := strings.Split(content, "\n")
var blocks []string
var cur strings.Builder
for _, line := range lines {
if strings.TrimSpace(line) == "==" {
blocks = append(blocks, cur.String())
cur.Reset()
continue
}
if cur.Len() > 0 {
cur.WriteByte('\n')
}
cur.WriteString(line)
}
if tail := strings.TrimSpace(cur.String()); tail != "" {
blocks = append(blocks, tail)
}
return blocks
}
type blockMeta struct {
Name string
RequiredFeatures []string
PrepareDatabase bool
Defaults FileDefaults
Line int
}
func splitMetaAndBody(block string) (blockMeta, string, error) {
lines := strings.Split(block, "\n")
meta := blockMeta{Line: 1}
var bodyLines []string
inBody := false
for i, line := range lines {
trim := strings.TrimSpace(line)
if !inBody && strings.HasPrefix(trim, "[") && strings.HasSuffix(trim, "]") {
if err := applyDirective(trim, &meta); err != nil {
return blockMeta{}, "", fmt.Errorf("line %d: %w", i+1, err)
}
if meta.Line == 1 {
meta.Line = i + 1
}
continue
}
if strings.TrimSpace(line) == "" && !inBody && len(bodyLines) == 0 {
continue
}
inBody = true
bodyLines = append(bodyLines, line)
}
return meta, strings.Join(bodyLines, "\n"), nil
}
func applyDirective(directive string, meta *blockMeta) error {
inner := strings.TrimSuffix(strings.TrimPrefix(directive, "["), "]")
if after, ok := strings.CutPrefix(inner, "default "); ok {
key, val, ok := strings.Cut(after, "=")
if !ok {
return fmt.Errorf("invalid default directive %q", directive)
}
switch strings.TrimSpace(key) {
case "required_features":
meta.Defaults.RequiredFeatures = splitCSV(val)
default:
return fmt.Errorf("unsupported default directive %q", key)
}
return nil
}
key, val, ok := strings.Cut(inner, "=")
if !ok {
key = inner
val = ""
}
switch strings.TrimSpace(key) {
case "name":
meta.Name = strings.TrimSpace(val)
case "required_features":
meta.RequiredFeatures = splitCSV(val)
case "prepare_database":
meta.PrepareDatabase = true
case "load_proto_files", "load_proto_names", "load_enum_names",
"parameters", "labels", "forbidden_features":
// Parsed for triage; runner skips cases that need these today.
return nil
default:
// Unknown directives are ignored so upstream additions do not
// break the parser; triage buckets them later if needed.
return nil
}
return nil
}
func mergeDefaults(cur, add FileDefaults) FileDefaults {
if len(add.RequiredFeatures) > 0 {
cur.RequiredFeatures = add.RequiredFeatures
}
return cur
}
func parseCase(path string, meta blockMeta, body string, defaults FileDefaults) (TestCase, error) {
sep := "\n--\n"
idx := strings.Index(body, sep)
if idx < 0 {
sep = "--"
idx = strings.Index(body, sep)
}
if idx < 0 {
return TestCase{}, fmt.Errorf("%s case %q: missing -- separator", path, meta.Name)
}
sql := strings.TrimSpace(body[:idx])
expectedRaw := strings.TrimSpace(body[idx+len(sep):])
if sql == "" {
return TestCase{}, fmt.Errorf("%s case %q: empty SQL", path, meta.Name)
}
var exp ExpectedResult
var expectErr string
if after, ok := strings.CutPrefix(expectedRaw, "ERROR:"); ok {
expectErr = strings.TrimSpace(after)
} else {
var err error
exp, err = ParseExpected(expectedRaw)
if err != nil {
return TestCase{}, fmt.Errorf("%s case %q: parse expected: %w", path, meta.Name, err)
}
}
features := append([]string{}, defaults.RequiredFeatures...)
features = append(features, meta.RequiredFeatures...)
return TestCase{
File: path,
Name: meta.Name,
RequiredFeatures: dedupe(features),
PrepareDatabase: meta.PrepareDatabase,
SQL: sql,
Expected: exp,
ExpectError: expectErr,
Line: meta.Line,
}, nil
}
func splitCSV(s string) []string {
var out []string
for part := range strings.SplitSeq(s, ",") {
part = strings.TrimSpace(part)
if part != "" {
out = append(out, part)
}
}
return out
}
func dedupe(in []string) []string {
seen := make(map[string]bool, len(in))
var out []string
for _, v := range in {
if seen[v] {
continue
}
seen[v] = true
out = append(out, v)
}
return out
}
package googlesqlcorpus
import (
"context"
"encoding/json"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/conformance/runner"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
const (
BucketEngineBug = "engine-bug"
BucketNotYetLanded = "not-yet-landed-route"
BucketFeatureOutOfScope = "corpus-feature-out-of-scope"
BucketPinnedPass = "pinned-pass"
)
// Options configures a corpus run.
type Options struct {
CorpusDir string
Manifest *Manifest
GatePinned bool
TriageMode bool
Harness runner.HarnessOptions
Profile string
ProjectID string
DatasetID string
Out io.Writer
Err io.Writer
}
// Result is one case outcome.
type Result struct {
ID string `json:"id"`
File string `json:"file"`
Name string `json:"name"`
Status string `json:"status"`
Bucket string `json:"bucket,omitempty"`
Message string `json:"message,omitempty"`
Diff string `json:"diff,omitempty"`
DurationMs int64 `json:"duration_ms"`
}
// Report aggregates a corpus invocation.
type Report struct {
Summary struct {
Total int `json:"total"`
Passed int `json:"passed"`
Failed int `json:"failed"`
Skipped int `json:"skipped"`
} `json:"summary"`
Results []Result `json:"results"`
}
// ExitCode mirrors the fixture runner semantics.
func (r *Report) ExitCode() int {
if r == nil {
return 2
}
if r.Summary.Failed > 0 {
return 1
}
return 0
}
// LoadCorpusDir parses every .test file under dir.
func LoadCorpusDir(dir string) ([]TestCase, error) {
var cases []TestCase
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() || !strings.HasSuffix(path, ".test") {
return nil
}
b, err := os.ReadFile(path) //nolint:gosec // corpus dir is CLI-controlled
if err != nil {
return err
}
tf, err := ParseFile(path, string(b))
if err != nil {
return err
}
cases = append(cases, tf.Cases...)
return nil
})
return cases, err
}
func normalizeOptions(opts *Options) {
if opts.CorpusDir == "" {
opts.CorpusDir = "conformance/googlesql-corpus/corpus"
}
if opts.Manifest == nil {
opts.Manifest = &Manifest{}
}
if opts.TriageMode && opts.Manifest.Triage == nil {
opts.Manifest.Triage = make(map[string]TriageEntry)
}
if opts.Profile == "" {
opts.Profile = runner.ProfileDuckDB
}
if opts.ProjectID == "" {
opts.ProjectID = "googlesql-corpus"
}
if opts.DatasetID == "" {
opts.DatasetID = "ds1"
}
if opts.Out == nil {
opts.Out = os.Stdout
}
if opts.Err == nil {
opts.Err = os.Stderr
}
}
// Run executes the corpus against the emulator.
func Run(ctx context.Context, opts Options) (*Report, error) {
normalizeOptions(&opts)
cases, err := LoadCorpusDir(opts.CorpusDir)
if err != nil {
return nil, err
}
profile, ok := runner.LookupProfile(opts.Profile)
if !ok {
return nil, fmt.Errorf("unknown profile %q", opts.Profile)
}
env, err := runner.StartEmulator(ctx, opts.Harness, profile)
if err != nil {
return nil, fmt.Errorf("start emulator: %w", err)
}
defer func() { _ = env.Close() }()
base := env.BaseURL + "/bigquery/v2/projects/" + opts.ProjectID
if err := seedDataset(ctx, base, opts.DatasetID); err != nil {
return nil, fmt.Errorf("seed dataset: %w", err)
}
report := &Report{}
for _, tc := range cases {
res := runCase(ctx, base, opts, tc)
report.Results = append(report.Results, res)
report.Summary.Total++
switch res.Status {
case string(runner.StatusPass):
report.Summary.Passed++
case string(runner.StatusFail):
report.Summary.Failed++
case string(runner.StatusSkip):
report.Summary.Skipped++
}
_, _ = fmt.Fprintf(opts.Out, "%s %s %s\n", res.Status, res.ID, res.Message)
if res.Diff != "" {
_, _ = fmt.Fprintf(opts.Out, "%s\n", res.Diff)
}
}
return report, nil
}
func seedDataset(ctx context.Context, base, dataset string) error {
body := fmt.Sprintf(
`{"datasetReference":{"projectId":"%s","datasetId":"%s"},"location":"US"}`,
projectIDFromBase(base), dataset)
status, respBody, err := runner.DoRequest(ctx, base+"/datasets", []byte(body))
if err != nil {
return err
}
if status == 409 {
return nil
}
if status < 200 || status >= 300 {
return fmt.Errorf("datasets.insert -> %d: %s", status, string(respBody))
}
return nil
}
func runCase(ctx context.Context, base string, opts Options, tc TestCase) Result {
started := time.Now()
res := baseResult(tc)
if skip, ok := skipCase(tc, opts); ok {
return finish(skip, started)
}
status, body, err := runner.QueryViaGateway(ctx, base, tc.SQL)
if err != nil {
res.Message = "query rpc: " + err.Error()
res.Bucket = BucketEngineBug
return finish(res, started)
}
if status < 200 || status >= 300 {
res.Message = fmt.Sprintf("query failed HTTP %d", status)
res.Diff = string(body)
res.Bucket = classifyFailure(tc, res.Message)
return finish(res, started)
}
var run bqtypes.QueryResponse
if err := json.Unmarshal(body, &run); err != nil {
res.Message = "decode response: " + err.Error()
res.Bucket = BucketEngineBug
return finish(res, started)
}
cols := schemaColumns(run.Schema)
exp := runner.Expectation{
Match: chooseMatch(tc.Expected.Ordered),
Rows: ToRunnerRows(tc.Expected.Rows, cols),
}
if diff := runner.CompareRows(exp, run.Schema, run.Rows); diff != "" {
res.Message = "row mismatch"
res.Diff = diff
res.Bucket = BucketEngineBug
return finish(res, started)
}
res.Status = string(runner.StatusPass)
res.Bucket = BucketPinnedPass
if opts.TriageMode {
opts.Manifest.Triage[res.ID] = TriageEntry{Bucket: BucketPinnedPass}
}
return finish(res, started)
}
func baseResult(tc TestCase) Result {
return Result{
ID: CaseID(tc),
File: tc.File,
Name: tc.Name,
Status: string(runner.StatusFail),
}
}
func skipCase(tc TestCase, opts Options) (Result, bool) {
res := baseResult(tc)
switch {
case tc.PrepareDatabase:
res.Status = string(runner.StatusSkip)
res.Bucket = BucketNotYetLanded
res.Message = "prepare_database seeding not yet implemented"
return res, true
case tc.ExpectError != "":
res.Status = string(runner.StatusSkip)
res.Bucket = BucketFeatureOutOfScope
res.Message = "error-expectation cases deferred in starter lane"
return res, true
default:
if ok, why := opts.Manifest.ShouldRun(tc, opts.GatePinned); !ok {
res.Status = string(runner.StatusSkip)
res.Bucket = BucketFeatureOutOfScope
res.Message = why
return res, true
}
}
return Result{}, false
}
func chooseMatch(ordered bool) runner.MatchMode {
if ordered {
return runner.MatchOrdered
}
return runner.MatchUnordered
}
func classifyFailure(tc TestCase, msg string) string {
lower := strings.ToLower(msg)
if strings.Contains(lower, "unimplemented") || strings.Contains(lower, "not implemented") {
return BucketNotYetLanded
}
for _, f := range tc.RequiredFeatures {
for _, skip := range []string{"PROTO", "JSON", "GRAPH", "PIPE", "MATCH_RECOGNIZE"} {
if strings.Contains(f, skip) {
return BucketFeatureOutOfScope
}
}
}
return BucketEngineBug
}
func finish(r Result, started time.Time) Result {
r.DurationMs = time.Since(started).Milliseconds()
return r
}
func schemaColumns(schema *bqtypes.TableSchema) []string {
if schema == nil {
return nil
}
out := make([]string, len(schema.Fields))
for i, f := range schema.Fields {
out[i] = f.Name
}
return out
}
func projectIDFromBase(base string) string {
const marker = "/projects/"
i := strings.LastIndex(base, marker)
if i < 0 {
return ""
}
rest := base[i+len(marker):]
if before, _, ok := strings.Cut(rest, "/"); ok {
return before
}
return rest
}
package runner
import (
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"gopkg.in/yaml.v3"
)
// ErrBaselineUpdateForbidden is returned when --update-baselines targets
// a hand-authored fixture that must not be bootstrapped from emulator output.
var ErrBaselineUpdateForbidden = errors.New(
"update-baselines refused: protected fixture (see .cursor/rules/conformance-core-usage.mdc)")
// BaselineUpdateForbidden reports whether --update-baselines must refuse
// to rewrite this fixture.
func (fx *Fixture) BaselineUpdateForbidden() bool {
if fx.VerifiedProduction {
return true
}
return strings.Contains(filepath.ToSlash(fx.Path), "/core_usage/")
}
func refuseBaselineUpdate(fx *Fixture) error {
if fx.BaselineUpdateForbidden() {
return ErrBaselineUpdateForbidden
}
return nil
}
// rewriteFixtureRows captures the gateway's QueryResponse rows back
// into the fixture's `expected.rows` block and writes the YAML to
// disk. Used by --update-baselines to bootstrap a new fixture.
//
// We intentionally re-marshal the entire fixture rather than try to
// surgically replace the `expected:` node. Comments above the
// fixture's `name:` survive (yaml.v3 keeps them on the root node when
// we re-encode), but inline comments inside the `expected:` block
// are dropped -- the trade-off is documented in
// `conformance/README.md`.
func rewriteFixtureRows(fx *Fixture, body []byte) error {
if err := refuseBaselineUpdate(fx); err != nil {
return err
}
var run bqtypes.QueryResponse
if err := json.Unmarshal(body, &run); err != nil {
return fmt.Errorf("decode QueryResponse for baseline: %w", err)
}
cols := schemaColumns(run.Schema)
rows := make([]map[string]any, 0, len(run.Rows))
for _, r := range run.Rows {
row := make(map[string]any, len(r.F))
for i, cell := range r.F {
name := positionalName(cols, i)
row[name] = baselineCellValue(cell.V)
}
rows = append(rows, row)
}
// Preserve the fixture's existing Match mode; baseline
// rewriting is a values-only operation, not a mode flip.
fx.Expected = Expectation{Match: fx.Expected.Match, Rows: rows}
return writeFixture(fx)
}
// rewriteFixtureError captures the gateway's error envelope back
// into the fixture's `expected.error` block. The runner pins the
// observed HTTP code; the message is captured as the BigQuery
// envelope's top-level `error.message` (or the first per-error
// `errors[].message` if the top-level field is empty).
func rewriteFixtureError(fx *Fixture, status int, body []byte) error {
if err := refuseBaselineUpdate(fx); err != nil {
return err
}
var env struct {
Error struct {
Message string `json:"message"`
Errors []struct {
Message string `json:"message"`
} `json:"errors"`
} `json:"error"`
}
_ = json.Unmarshal(body, &env)
msg := env.Error.Message
if msg == "" && len(env.Error.Errors) > 0 {
msg = env.Error.Errors[0].Message
}
fx.Expected = Expectation{Error: &ExpectedError{
Code: status,
MessageContains: msg,
}}
return writeFixture(fx)
}
// baselineCellValue maps a wire-format cell value (string scalar,
// nested object, or REPEATED array) onto the YAML form that should
// be written back into the fixture's `expected.rows` block. Scalars
// keep their string form (BigQuery encodes everything as strings on
// the wire), NULLs land as YAML `null`, and nested structures are
// passed through `any` so the YAML encoder renders them as inline
// maps/sequences.
func baselineCellValue(v any) any {
if v == nil {
return nil
}
if s, ok := v.(string); ok {
return s
}
return v
}
// writeFixture serializes the fixture and atomically replaces the
// file on disk via the standard "write-temp-then-rename" pattern.
func writeFixture(fx *Fixture) error {
data, err := yaml.Marshal(fx)
if err != nil {
return fmt.Errorf("marshal fixture: %w", err)
}
tmp := fx.Path + ".tmp"
if err := os.WriteFile(tmp, data, 0o600); err != nil {
return fmt.Errorf("write tmp %s: %w", tmp, err)
}
if err := os.Rename(tmp, fx.Path); err != nil {
_ = os.Remove(tmp)
return fmt.Errorf("rename %s -> %s: %w", tmp, fx.Path, err)
}
return nil
}
package runner
import (
"fmt"
"sort"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// rowDiff dispatches the actual row vs expected comparison based on
// the fixture's declared match mode. An empty string means PASS.
//
// The diff engine is mode-aware:
//
// - MatchOrdered: pairwise compare row i ↔ actualRows[i] with typed
// cell comparison driven by the gateway-supplied schema (INT64
// compares as int, FLOAT64 with a relative epsilon, etc.). This
// is the default ordered-row comparison.
// - MatchUnordered: treats both sides as a multiset and compares
// after type-aware canonicalization. Use when the storage engine
// does not guarantee row order and the query lacks ORDER BY.
// - MatchSchemaOnly: ignores `expected.rows` entirely and checks
// that the response schema matches `expected.schema` (or, if no
// explicit schema is declared, the column names taken from
// `expected.rows[0]`).
func rowDiff(exp Expectation, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
mode := exp.Match
if mode == "" {
mode = MatchOrdered
}
if mode == MatchSchemaOnly {
return schemaDiff(exp, schema)
}
if diff := schemaPreflight(exp, schema); diff != "" {
return diff
}
switch mode {
case MatchOrdered:
return orderedRowDiff(exp.Rows, schema, actualRows)
case MatchUnordered:
return unorderedRowDiff(exp.Rows, schema, actualRows)
default:
// Loader validates mode, so this is unreachable at run time.
return fmt.Sprintf("internal: unknown match mode %q", mode)
}
}
// schemaPreflight enforces an opt-in column-set assertion before the
// row diff runs. If the fixture declared `expected.schema:` it must
// match the gateway's response; otherwise we are silent.
func schemaPreflight(exp Expectation, actual *bqtypes.TableSchema) string {
if len(exp.Schema) == 0 {
return ""
}
return diffSchemaList(exp.Schema, actual, true)
}
// schemaDiff is the schema_only-mode entry point. Tries the explicit
// `expected.schema:` declaration first; falls back to the column-name
// set derived from `expected.rows[0]` if the fixture writer leaned on
// the rows-as-column-template shorthand.
func schemaDiff(exp Expectation, actual *bqtypes.TableSchema) string {
if len(exp.Schema) > 0 {
return diffSchemaList(exp.Schema, actual, true)
}
// Names-only fallback. Pull the expected column set from
// rows[0] so a writer can pin "make sure these columns come
// back" without having to spell out the type for each one.
if len(exp.Rows) == 0 {
// Loader rejects this combo, so this is only a safety
// net.
return "schema_only: nothing to compare against (no schema:, no rows:)"
}
expected := make([]ExpectedColumn, 0, len(exp.Rows[0]))
for name := range exp.Rows[0] {
expected = append(expected, ExpectedColumn{Name: name})
}
sort.Slice(expected, func(i, j int) bool { return expected[i].Name < expected[j].Name })
return diffSchemaList(expected, actual, false)
}
// diffSchemaList compares a list of expected columns against the
// gateway's schema. checkTypes=true enforces the Type field on each
// column (case-insensitive); checkTypes=false is the names-only path
// used by the rows-shorthand for schema_only.
func diffSchemaList(expected []ExpectedColumn, actual *bqtypes.TableSchema, checkTypes bool) string {
if actual == nil || len(actual.Fields) == 0 {
return fmt.Sprintf(
"schema mismatch: expected %d columns, gateway returned no schema",
len(expected))
}
if len(expected) != len(actual.Fields) {
return renderSchemaDiff(expected, actual)
}
// When the fallback path supplies expected as a sorted
// column-name set, allow the actual schema's order to differ;
// otherwise the comparison is positional (matches BigQuery's
// `schema.fields[]` ordering semantics).
if !checkTypes {
actualNames := make([]string, 0, len(actual.Fields))
for _, f := range actual.Fields {
actualNames = append(actualNames, f.Name)
}
sort.Strings(actualNames)
for i, n := range actualNames {
if !strings.EqualFold(expected[i].Name, n) {
return renderSchemaDiff(expected, actual)
}
}
return ""
}
for i, e := range expected {
a := actual.Fields[i]
if !strings.EqualFold(e.Name, a.Name) {
return renderSchemaDiff(expected, actual)
}
if e.Type != "" && !schemaTypesEqual(e.Type, a.Type) {
return renderSchemaDiff(expected, actual)
}
if e.Mode != "" && !strings.EqualFold(e.Mode, a.Mode) {
return renderSchemaDiff(expected, actual)
}
}
return ""
}
// schemaTypesEqual compares fixture-declared types against the
// gateway REST schema, normalizing aliases the gateway emits
// (INTEGER/FLOAT/BOOLEAN) to their canonical fixture spellings.
func schemaTypesEqual(expected, actual string) bool {
return normalizeSchemaType(expected) == normalizeSchemaType(actual)
}
func normalizeSchemaType(t string) string {
switch strings.ToUpper(strings.TrimSpace(t)) {
case bqTypeINT64, bqTypeIntegerAlias:
return bqTypeINT64
case bqTypeFLOAT64, bqTypeFloatAlias:
return bqTypeFLOAT64
case bqTypeBool, bqTypeBooleanAlias:
return bqTypeBool
default:
return strings.ToUpper(strings.TrimSpace(t))
}
}
// renderSchemaDiff prints both schemas side by side so the failing
// column or type is visible at a glance.
func renderSchemaDiff(expected []ExpectedColumn, actual *bqtypes.TableSchema) string {
var b strings.Builder
b.WriteString("schema mismatch\nexpected:\n")
for _, c := range expected {
if c.Type == "" && c.Mode == "" {
fmt.Fprintf(&b, " %s\n", c.Name)
continue
}
if c.Mode == "" {
fmt.Fprintf(&b, " %s:%s\n", c.Name, strings.ToUpper(c.Type))
continue
}
fmt.Fprintf(&b, " %s:%s:%s\n", c.Name, strings.ToUpper(c.Type),
strings.ToUpper(c.Mode))
}
b.WriteString("actual:\n")
if actual == nil || len(actual.Fields) == 0 {
b.WriteString(" (no schema)\n")
} else {
for _, f := range actual.Fields {
if f.Mode == "" {
fmt.Fprintf(&b, " %s:%s\n", f.Name, strings.ToUpper(f.Type))
continue
}
fmt.Fprintf(&b, " %s:%s:%s\n", f.Name, strings.ToUpper(f.Type),
strings.ToUpper(f.Mode))
}
}
return b.String()
}
// orderedRowDiff is the default comparison: row i is compared against
// actualRows[i] cell-by-cell. Typed comparison kicks in based on the
// column's SQL type from the gateway-supplied schema.
func orderedRowDiff(expected []map[string]any, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
cols := schemaColumns(schema)
types := schemaTypes(schema)
if len(expected) == len(actualRows) {
match := true
for i := range expected {
if !rowMatchesTyped(expected[i], actualRows[i], cols, types, schema) {
match = false
break
}
}
if match {
return ""
}
}
return unifiedDiff(
renderExpectedRows(expected, cols, types),
renderActualRows(actualRows, cols, types),
)
}
// unorderedRowDiff compares the two sides as a multiset. Both sides
// are canonicalized to type-normalized strings and bucketed; any row
// with mismatched counts surfaces in the unified diff as
// "missing" (present only on the expected side) or
// "extra" (present only on the actual side).
//
// Float epsilon is best-effort under this mode: the canonicalizer
// rounds float64 values to 12 significant digits so values within
// ~1e-12 relative tolerance still bucket together. Ordered mode
// remains the right tool for fixtures whose tolerance budget is
// tighter than that.
func unorderedRowDiff(expected []map[string]any, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
cols := schemaColumns(schema)
types := schemaTypes(schema)
expCanon, expLines := groupExpected(expected, cols, types)
actCanon, actLines := groupActual(actualRows, cols, types)
if multisetsEqual(expCanon, actCanon) {
return ""
}
missing, extra := diffMultiset(expCanon, actCanon)
sort.Strings(missing)
sort.Strings(extra)
sort.Strings(expLines)
sort.Strings(actLines)
return renderUnorderedDiff(expLines, actLines, missing, extra)
}
// groupExpected canonicalizes the expected rows and returns both the
// per-line multiset and the original (canonical) line ordering. The
// caller relies on the latter for the "expected (multiset)" stanza.
func groupExpected(expected []map[string]any, cols, types []string) (map[string]int, []string) {
canon := make(map[string]int, len(expected))
lines := make([]string, 0, len(expected))
for _, r := range expected {
line := canonicalExpectedRow(r, cols, types)
canon[line]++
lines = append(lines, line)
}
return canon, lines
}
// groupActual mirrors groupExpected for the engine-emitted rows.
func groupActual(actual []bqtypes.Row, cols, types []string) (map[string]int, []string) {
canon := make(map[string]int, len(actual))
lines := make([]string, 0, len(actual))
for _, r := range actual {
line := canonicalActualRow(r, cols, types)
canon[line]++
lines = append(lines, line)
}
return canon, lines
}
// multisetsEqual returns true when both line→count maps describe the
// same multiset (both sizes and per-key counts agree).
func multisetsEqual(a, b map[string]int) bool {
if len(a) != len(b) {
return false
}
for k, v := range a {
if b[k] != v {
return false
}
}
return true
}
// diffMultiset returns the lines that appear too few times on the
// actual side ("missing") and too many times on the actual side
// ("extra"). Both slices are unsorted; the caller sorts for stable
// diff output.
func diffMultiset(exp, act map[string]int) (missing, extra []string) {
for k, v := range exp {
for range v - act[k] {
missing = append(missing, k)
}
}
for k, v := range act {
for range v - exp[k] {
extra = append(extra, k)
}
}
return missing, extra
}
// renderUnorderedDiff materializes the user-facing multiset-diff
// string. Each stanza is emitted whether or not the corresponding
// slice is empty, except `missing`/`extra` which are skipped when
// the row count is zero (so a swap-only mismatch only prints the
// two multisets without phantom "missing:" / "extra:" headers).
func renderUnorderedDiff(expLines, actLines, missing, extra []string) string {
var b strings.Builder
b.WriteString("unordered row mismatch\nexpected (multiset):\n")
writeRowStanza(&b, expLines)
b.WriteString("actual (multiset):\n")
writeRowStanza(&b, actLines)
if len(missing) > 0 {
b.WriteString("missing (expected but not in actual):\n")
for _, line := range missing {
b.WriteString(" ")
b.WriteString(line)
b.WriteString("\n")
}
}
if len(extra) > 0 {
b.WriteString("extra (actual but not in expected):\n")
for _, line := range extra {
b.WriteString(" ")
b.WriteString(line)
b.WriteString("\n")
}
}
return b.String()
}
// writeRowStanza emits the indented row block for one side of the
// unordered diff, substituting the explicit "(no rows)" sentinel
// when the slice is empty so the renderer never collapses an empty
// section silently.
func writeRowStanza(b *strings.Builder, lines []string) {
if len(lines) == 0 {
b.WriteString(" (no rows)\n")
return
}
for _, line := range lines {
b.WriteString(" ")
b.WriteString(line)
b.WriteString("\n")
}
}
// rowMatchesTyped is the per-row typed comparator used by ordered
// mode. Returns true when every cell in `expected` matches the
// corresponding cell in `actual` under the column's SQL type
// (INT64/NUMERIC compare as numbers, FLOAT64 with epsilon, etc.).
// Missing keys on either side are surfaced as mismatches so the
// diff exposes column-name drift.
func rowMatchesTyped(
expected map[string]any,
actual bqtypes.Row,
cols []string,
types []string,
schema *bqtypes.TableSchema,
) bool {
for i, col := range cols {
var actVal any
if i < len(actual.F) {
actVal = actual.F[i].V
}
expVal, hasExp := expected[col]
if !hasExp {
// Expected row lacks this column. If both sides are
// "missing" we treat it as NULL; otherwise it is a
// real divergence.
if actVal == nil {
continue
}
return false
}
fieldType := ""
fieldMode := ""
if i < len(types) {
fieldType = types[i]
}
if schema != nil && i < len(schema.Fields) {
fieldMode = schema.Fields[i].Mode
}
if !cellsEqual(expVal, actVal, fieldType, fieldMode) {
return false
}
}
// Reject extra keys on the expected side that the schema does
// not include; otherwise the fixture writer could pin a column
// the engine never returned and the diff would silently pass.
for k := range expected {
if !containsString(cols, k) {
return false
}
}
// Reject extra cells on the actual side that the schema does
// not enumerate (the gateway should never do this, but the
// belt-and-braces check keeps the diff honest if it does).
if len(actual.F) > len(cols) {
return false
}
return true
}
// schemaColumns returns the schema's column names in declared order.
func schemaColumns(schema *bqtypes.TableSchema) []string {
if schema == nil {
return nil
}
out := make([]string, len(schema.Fields))
for i, f := range schema.Fields {
out[i] = f.Name
}
return out
}
// schemaTypes returns the schema's column types in declared order.
// Empty when the schema is nil so callers can rely on positional
// lookup without bounds-checking.
func schemaTypes(schema *bqtypes.TableSchema) []string {
if schema == nil {
return nil
}
out := make([]string, len(schema.Fields))
for i, f := range schema.Fields {
out[i] = f.Type
}
return out
}
// positionalName returns the column name at position i, falling
// back to `col<i>` when the schema is absent or too short.
func positionalName(cols []string, i int) string {
if i < len(cols) {
return cols[i]
}
return fmt.Sprintf("col%d", i)
}
// canonicalExpectedRow renders an expected row into its
// type-normalized one-line form (sorted by column name) so the
// unordered bucketing can compare it byte-for-byte.
func canonicalExpectedRow(r map[string]any, cols []string, types []string) string {
pairs := make([]string, 0, len(cols)+len(r))
seen := make(map[string]bool, len(cols))
for i, c := range cols {
ft := ""
if i < len(types) {
ft = types[i]
}
v, ok := r[c]
if !ok {
pairs = append(pairs, c+"=<missing>")
} else {
pairs = append(pairs, fmt.Sprintf("%s=%s", c, canonicalCell(v, ft)))
}
seen[c] = true
}
// Surface stray expected columns that the schema does not
// know about so the diff exposes the divergence.
extras := make([]string, 0)
for k := range r {
if !seen[k] {
extras = append(extras, k)
}
}
sort.Strings(extras)
for _, k := range extras {
pairs = append(pairs, fmt.Sprintf("%s=%s", k, canonicalCell(r[k], "")))
}
return "{" + strings.Join(pairs, ", ") + "}"
}
// canonicalActualRow renders one wire-format row into the same
// canonical form `canonicalExpectedRow` emits.
func canonicalActualRow(r bqtypes.Row, cols []string, types []string) string {
pairs := make([]string, 0, len(r.F))
for i, cell := range r.F {
name := positionalName(cols, i)
ft := ""
if i < len(types) {
ft = types[i]
}
pairs = append(pairs, fmt.Sprintf("%s=%s", name, canonicalCell(cell.V, ft)))
}
return "{" + strings.Join(pairs, ", ") + "}"
}
// renderExpectedRows is the diff-rendering helper for the ordered
// path. Mirrors the ordered-mode layout (one row per line, sorted
// keys) so the typed diff stays scannable.
func renderExpectedRows(rows []map[string]any, cols []string, types []string) []string {
out := make([]string, 0, len(rows))
for i, r := range rows {
out = append(out, fmt.Sprintf("row %d: %s", i, canonicalExpectedRow(r, cols, types)))
}
return out
}
// renderActualRows is the diff-rendering helper for the actual side.
func renderActualRows(rows []bqtypes.Row, cols []string, types []string) []string {
out := make([]string, 0, len(rows))
for i, r := range rows {
out = append(out, fmt.Sprintf("row %d: %s", i, canonicalActualRow(r, cols, types)))
}
return out
}
// unifiedDiff is the side-by-side expected-vs-actual renderer used
// for the ordered-mode mismatch path. A full Myers diff is
// deliberately not used: fixture row counts are small and a
// side-by-side listing is more legible than a hunk-grouped diff.
func unifiedDiff(expected, actual []string) string {
var b strings.Builder
b.WriteString("expected:\n")
if len(expected) == 0 {
b.WriteString(" (no rows)\n")
}
for _, line := range expected {
b.WriteString(" ")
b.WriteString(line)
b.WriteString("\n")
}
b.WriteString("actual:\n")
if len(actual) == 0 {
b.WriteString(" (no rows)\n")
}
for _, line := range actual {
b.WriteString(" ")
b.WriteString(line)
b.WriteString("\n")
}
return b.String()
}
package runner
import (
"encoding/json"
"fmt"
"slices"
"strings"
)
// CompareError compares the gateway's error envelope against an
// expected.error block. Returns an empty string on match.
func CompareError(expected ExpectedError, status int, body []byte) string {
return errorDiff(expected, status, body)
}
// errorDiff compares the gateway's error envelope against an
// `expected.error` block and returns an empty string on match or a
// human-readable message on mismatch.
func errorDiff(expected ExpectedError, status int, body []byte) string {
var env struct {
Error struct {
Code int `json:"code"`
Message string `json:"message"`
Status string `json:"status"`
Errors []struct {
Reason string `json:"reason"`
Message string `json:"message"`
} `json:"errors"`
} `json:"error"`
}
_ = json.Unmarshal(body, &env)
if expected.Code != 0 && expected.Code != status {
return fmt.Sprintf("error code: expected %d, got %d (body: %s)",
expected.Code, status, snippet(body))
}
if expected.MessageContains != "" {
hay := env.Error.Message
if hay == "" && len(env.Error.Errors) > 0 {
hay = env.Error.Errors[0].Message
}
if !strings.Contains(hay, expected.MessageContains) {
return fmt.Sprintf(
"error message: expected to contain %q, got %q (body: %s)",
expected.MessageContains, hay, snippet(body))
}
}
return ""
}
// snippet truncates a body for inclusion in a diff message; the
// body can be large (the engine emits ZetaSQL parse-error pointers)
// and we want the diff to stay scannable.
func snippet(b []byte) string {
const limit = 240
s := strings.TrimSpace(string(b))
if len(s) > limit {
s = s[:limit] + "..."
}
return s
}
func containsString(haystack []string, needle string) bool {
return slices.Contains(haystack, needle)
}
package runner
import (
"encoding/json"
"fmt"
"math"
"math/big"
"strconv"
"strings"
"time"
)
// floatRelEpsilon is the relative tolerance used when comparing
// FLOAT64 cells. 1e-9 is loose enough to absorb the round-trip
// IEEE-754 noise that BigQuery's wire encoding introduces (the
// gateway formats float64s with `strconv.FormatFloat(v, 'g', -1, 64)`
// which is bit-exact, but the YAML decoder + JSON unmarshal pair on
// the expected side does pass values through `strconv.ParseFloat`).
const floatRelEpsilon = 1e-9
// cellsEqual is the type-aware cell equality predicate. It returns
// false for NULL-vs-non-NULL pairs and otherwise delegates to a
// per-type comparator. Fall-through for unknown types is the
// canonical string-form compare.
func cellsEqual(expected, actual any, fieldType, fieldMode string) bool {
expIsNull := isNullExpected(expected)
actIsNull := actual == nil
if expIsNull && actIsNull {
return true
}
if expIsNull != actIsNull {
return false
}
if strings.EqualFold(strings.TrimSpace(fieldMode), "REPEATED") {
return repeatedCellsEqual(expected, actual, fieldType)
}
switch strings.ToUpper(strings.TrimSpace(fieldType)) {
case bqTypeINT64, bqTypeIntegerAlias, "NUMERIC", "BIGNUMERIC":
return numericEqual(expected, actual)
case bqTypeFLOAT64, bqTypeFloatAlias:
return floatEqual(expected, actual)
case bqTypeBool, bqTypeBooleanAlias:
return boolEqual(expected, actual)
case "TIMESTAMP", "DATE", "DATETIME", "TIME":
return timeEqual(expected, actual)
case bqTypeSTRING, "BYTES":
return stringForm(expected) == stringForm(actual)
default:
// Unknown / empty type (e.g. STRUCT/REPEATED at the top
// level, or the schema is absent): fall back to the
// stringy compare so nothing regresses for the existing
// fixtures.
return stringForm(expected) == stringForm(actual)
}
}
// repeatedCellsEqual compares REPEATED column values. Expected YAML
// arrays (`["1","2"]`) are normalized alongside BigQuery REST wire
// arrays (`[{"v":"1"},{"v":"2"}]`).
func repeatedCellsEqual(expected, actual any, elemType string) bool {
expElems, okExp := normalizeArrayElements(expected)
actElems, okAct := normalizeArrayElements(actual)
if !okExp || !okAct {
return stringForm(expected) == stringForm(actual)
}
if len(expElems) != len(actElems) {
return false
}
for i := range expElems {
if !cellsEqual(expElems[i], actElems[i], elemType, "") {
return false
}
}
return true
}
func normalizeArrayElements(v any) ([]any, bool) {
arr, ok := v.([]any)
if !ok {
return nil, false
}
out := make([]any, len(arr))
for i, el := range arr {
if m, ok := el.(map[string]any); ok {
if inner, ok := m["v"]; ok {
out[i] = inner
continue
}
}
out[i] = el
}
return out, true
}
// isNullExpected returns true when a YAML-decoded value is the
// canonical NULL marker. Distinguishes between `nil` (YAML `null`)
// and the literal string "NULL" (which a fixture would have to
// quote explicitly).
func isNullExpected(v any) bool {
return v == nil
}
// numericEqual compares two values as exact rationals. INT64,
// NUMERIC, and BIGNUMERIC all use this path so a YAML `1` matches
// the wire `"1"` regardless of how either side wrote it. Returns
// false when either side cannot be parsed as a rational.
func numericEqual(expected, actual any) bool {
e := toRat(expected)
a := toRat(actual)
if e == nil || a == nil {
return false
}
return e.Cmp(a) == 0
}
// toRat best-effort parses a value into math/big.Rat. Integers,
// floats, and strings of either are all accepted; everything else
// returns nil so cellsEqual can flag a type drift instead of a
// silent zero-vs-zero pass.
func toRat(v any) *big.Rat {
switch x := v.(type) {
case nil:
return nil
case *big.Rat:
return x
case int:
return new(big.Rat).SetInt64(int64(x))
case int32:
return new(big.Rat).SetInt64(int64(x))
case int64:
return new(big.Rat).SetInt64(x)
case uint:
r := new(big.Rat)
r.SetUint64(uint64(x))
return r
case uint32:
r := new(big.Rat)
r.SetUint64(uint64(x))
return r
case uint64:
r := new(big.Rat)
r.SetUint64(x)
return r
case float32:
r := new(big.Rat)
r.SetFloat64(float64(x))
return r
case float64:
r := new(big.Rat)
r.SetFloat64(x)
return r
case string:
s := strings.TrimSpace(x)
if s == "" {
return nil
}
if r, ok := new(big.Rat).SetString(s); ok {
return r
}
return nil
}
return nil
}
// floatEqual compares two values as float64 with a relative
// epsilon (floatRelEpsilon). Special-cases exact zero so an
// expected-zero / actual-zero pair does not divide by zero.
func floatEqual(expected, actual any) bool {
e, ok1 := toFloat(expected)
a, ok2 := toFloat(actual)
if !ok1 || !ok2 {
return false
}
if math.IsNaN(e) || math.IsNaN(a) {
return math.IsNaN(e) && math.IsNaN(a)
}
if e == a {
return true
}
diff := math.Abs(e - a)
norm := math.Max(math.Abs(e), math.Abs(a))
if norm == 0 {
return diff <= floatRelEpsilon
}
return diff/norm <= floatRelEpsilon
}
// toFloat parses a value into float64 best-effort. Strings of digit
// literals are accepted (BigQuery's wire format encodes everything
// as a string).
func toFloat(v any) (float64, bool) {
switch x := v.(type) {
case nil:
return 0, false
case float64:
return x, true
case float32:
return float64(x), true
case int:
return float64(x), true
case int32:
return float64(x), true
case int64:
return float64(x), true
case uint:
return float64(x), true
case uint64:
return float64(x), true
case string:
s := strings.TrimSpace(x)
f, err := strconv.ParseFloat(s, 64)
if err != nil {
return 0, false
}
return f, true
}
return 0, false
}
// boolEqual normalizes "true"/"false"/"1"/"0" forms (case
// insensitive) before comparison. The YAML decoder gives us a real
// bool; the wire gives us a string; the normalizer reconciles them.
func boolEqual(expected, actual any) bool {
e, ok1 := toBool(expected)
a, ok2 := toBool(actual)
if !ok1 || !ok2 {
return false
}
return e == a
}
// toBool returns the canonical bool form for a value. Strings are
// recognized as "true"/"false"/"t"/"f"/"1"/"0" (case insensitive);
// integers as 0/non-zero; anything else returns ok=false.
func toBool(v any) (bool, bool) {
switch x := v.(type) {
case bool:
return x, true
case string:
switch strings.ToLower(strings.TrimSpace(x)) {
case "true", "t", "1":
return true, true
case "false", "f", "0":
return false, true
}
case int:
return x != 0, true
case int32:
return x != 0, true
case int64:
return x != 0, true
}
return false, false
}
// timeEqual parses both sides as time.Time and compares for
// instant-equality. Accepts RFC3339 with optional nanoseconds, the
// SQL `YYYY-MM-DD HH:MM:SS[.fffffffff]` shape, plain dates, and the
// Unix-seconds-as-string form BigQuery uses for TIMESTAMP on the
// wire.
func timeEqual(expected, actual any) bool {
e, ok1 := toTime(expected)
a, ok2 := toTime(actual)
if !ok1 || !ok2 {
return false
}
return e.Equal(a)
}
var timeFormats = []string{
time.RFC3339Nano,
time.RFC3339,
"2006-01-02T15:04:05.999999999",
"2006-01-02T15:04:05",
"2006-01-02 15:04:05.999999999 MST",
"2006-01-02 15:04:05.999999999",
"2006-01-02 15:04:05",
"2006-01-02",
"15:04:05.999999999",
"15:04:05",
}
// toTime parses a value into time.Time. Returns ok=false when no
// recognized format matches.
func toTime(v any) (time.Time, bool) {
switch x := v.(type) {
case nil:
return time.Time{}, false
case time.Time:
return x, true
case string:
if t, ok := parseTimestampString(x); ok {
return t, true
}
case int:
return time.Unix(int64(x), 0).UTC(), true
case int64:
return time.Unix(x, 0).UTC(), true
case float64:
sec := int64(x)
nsec := int64((x - float64(sec)) * 1e9)
return time.Unix(sec, nsec).UTC(), true
}
return time.Time{}, false
}
// parseTimestampString tries the registered RFC formats first, then
// falls back to BigQuery's TIMESTAMP wire form (Unix seconds with an
// optional fractional component). Pulled out of toTime so the
// fallback's natural conditional nesting stops tripping nestif.
func parseTimestampString(raw string) (time.Time, bool) {
s := strings.TrimSpace(raw)
if s == "" {
return time.Time{}, false
}
for _, f := range timeFormats {
if t, err := time.Parse(f, s); err == nil {
return t.UTC(), true
}
}
if t, ok := parseUnixSecondsString(s); ok {
return t, true
}
if sec, err := strconv.ParseInt(s, 10, 64); err == nil {
return time.Unix(sec, 0).UTC(), true
}
return time.Time{}, false
}
// parseUnixSecondsString parses BigQuery's `<sec>.<frac>` TIMESTAMP
// wire encoding without going through float64 (which would drop
// precision past microseconds). Returns ok=false when the input is
// not in the dotted form.
func parseUnixSecondsString(s string) (time.Time, bool) {
before, after, ok := strings.Cut(s, ".")
if !ok {
return time.Time{}, false
}
sec, err := strconv.ParseInt(before, 10, 64)
if err != nil {
return time.Time{}, false
}
frac := after
if len(frac) > 9 {
frac = frac[:9]
}
for len(frac) < 9 {
frac += "0"
}
nsec, err := strconv.ParseInt(frac, 10, 64)
if err != nil {
return time.Time{}, false
}
return time.Unix(sec, nsec).UTC(), true
}
// stringForm returns the canonical scalar string for the diff
// renderer. STRING/BYTES compare on this literal form, with the
// NULL sentinel kept distinct from the literal string "NULL".
func stringForm(v any) string {
if v == nil {
return "<NULL>"
}
switch x := v.(type) {
case string:
return x
case bool:
if x {
return boolLiteralTrue
}
return boolLiteralFalse
case int, int32, int64, uint, uint32, uint64:
return fmt.Sprintf("%d", x)
case float32, float64:
return fmt.Sprintf("%v", x)
default:
b, err := json.Marshal(v)
if err != nil {
return fmt.Sprintf("%v", v)
}
return string(b)
}
}
// canonicalCell renders one value into its type-normalized text
// form. The result is what both sides of the unordered multiset
// bucket on, so the implementation must be deterministic across
// "1" vs 1, "true" vs true, 1.0 vs "1.0", etc.
func canonicalCell(v any, fieldType string) string {
if v == nil {
return "<NULL>"
}
switch strings.ToUpper(strings.TrimSpace(fieldType)) {
case bqTypeINT64, bqTypeIntegerAlias, "NUMERIC", "BIGNUMERIC":
if r := toRat(v); r != nil {
return r.RatString()
}
case bqTypeFLOAT64, bqTypeFloatAlias:
if f, ok := toFloat(v); ok {
// 12 significant digits absorbs ~1e-12 relative
// drift; ordered-mode epsilon still applies for
// tighter tolerances.
return strconv.FormatFloat(f, 'g', 12, 64)
}
case bqTypeBool, bqTypeBooleanAlias:
if b, ok := toBool(v); ok {
if b {
return boolLiteralTrue
}
return boolLiteralFalse
}
case "TIMESTAMP", "DATE", "DATETIME", "TIME":
if t, ok := toTime(v); ok {
return t.UTC().Format(time.RFC3339Nano)
}
}
return stringForm(v)
}
// Package runner is the engine half of the conformance harness:
// fixture loading, profile resolution, REST execution, and row /
// error diffing. The CLI entry point lives in
// `conformance/cmd/runner`; tests that exercise the runner against a
// real `emulator_main` subprocess live alongside the CLI behind the
// `//go:build integration` tag.
//
// The package is structured so the parsing and diff logic can be unit
// tested without a running engine: see `runner_test.go`. The harness
// half (`harness.go`) is the only code that touches subprocesses.
package runner
import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"gopkg.in/yaml.v3"
)
// Fixture is the in-memory shape of a single YAML conformance file.
//
// See `conformance/README.md` for the worked schema. Every field on
// the wire is optional except `name` and `query`; the loader supplies
// safe defaults for the others so a fixture writer only has to spell
// out the fields they need.
type Fixture struct {
// Name identifies the fixture in logs and diff output. By
// convention it matches the YAML filename (without extension)
// so a divergence between the two is easy to spot.
Name string `yaml:"name"`
// Description is free-form prose that gets echoed under the
// fixture title in `--output text`. Optional.
Description string `yaml:"description,omitempty"`
// Profiles is the runtime matrix the fixture applies to. Empty
// means the default profile set (today: a single local-
// execution coordinator over DuckDB storage). Unknown profile
// names are an error at load time so a typo is caught
// immediately rather than masked as "fixture ran on zero
// profiles".
Profiles []string `yaml:"profiles,omitempty"`
// ProjectID is the BigQuery project the runner POSTs catalog +
// query work against. Defaults to `proj-conformance-<name>` so
// fixtures stay isolated even when share an emulator (via
// `--connect`).
ProjectID string `yaml:"project_id,omitempty"`
// DatasetID is a documentation hint; the runner does not
// auto-create it. Use a `setup` step with `dataset: <id>` to
// actually create the dataset.
DatasetID string `yaml:"dataset_id,omitempty"`
// Setup runs in order before `Query`. Each step is dispatched
// on which discriminator field is set (`dataset`, `table`,
// `sql`); see SetupStep.
Setup []SetupStep `yaml:"setup,omitempty"`
// Query is the SQL the runner POSTs to /queries and asserts on.
// Required. For DML-only fixtures, prefer encoding the assertion
// as a SELECT after the mutation so the diff stays declarative.
Query string `yaml:"query"`
// DefaultDataset is an optional request-level default dataset ID
// sent as `defaultDataset.datasetId` on jobs.query. Unblocks bare
// table names (`SELECT * FROM t`) matching production BigQuery
// client behavior.
DefaultDataset string `yaml:"default_dataset,omitempty"`
// VerifiedProduction marks fixtures whose expected rows/errors were
// hand-authored from production BigQuery semantics. The runner
// refuses --update-baselines rewrites on such fixtures.
VerifiedProduction bool `yaml:"verified_production,omitempty"`
// OptionalDependencies lists host packages (e.g. Python modules) that
// must be importable for the fixture to run. When any are missing the
// runner reports SKIP instead of FAIL so host-dependent fixtures stay
// green on machines without optional deps installed.
OptionalDependencies []string `yaml:"optional_dependencies,omitempty"`
// Expected pins either the expected row set or the expected
// HTTP error envelope. Exactly one of the two must be set.
Expected Expectation `yaml:"expected"`
// Path is filled in by Load; not parsed from YAML.
Path string `yaml:"-"`
}
// SetupStep is one entry in `Fixture.Setup`. The four discriminator
// fields are mutually exclusive: `Dataset` for a dataset create,
// `Table` for a table create, `Rows` for a `tabledata.insertAll`
// seed, and `SQL` for a query (typically DML or DDL). The loader
// rejects steps that set more than one or none.
type SetupStep struct {
// Dataset is the dataset ID to create. The runner POSTs a
// minimal `{datasetReference, location:"US"}` body against
// `/bigquery/v2/projects/<projectId>/datasets`.
Dataset string `yaml:"dataset,omitempty"`
// Table is the table to create. The runner POSTs against
// `/bigquery/v2/projects/<projectId>/datasets/<datasetId>/tables`.
Table *TableSetup `yaml:"table,omitempty"`
// Rows seeds a previously created table by POSTing
// `tabledata.insertAll`. The streaming-insert path is the right
// tool when the fixture wants to assert the streaming side of
// the wire (separate from the DML envelope); INSERT VALUES /
// UPDATE / DELETE now land via the local DML executor
// (`backend/engine/semantic/dml/`), so fixtures that just
// want seed data may use either `rows:` or an `sql:` step.
Rows *RowsSetup `yaml:"rows,omitempty"`
// SQL is a query the runner POSTs to /queries. Errors from the
// gateway abort the fixture (counted as runner-internal failure,
// not a fixture mismatch). Use this for MERGE, CREATE TABLE,
// DROP TABLE, and the INSERT VALUES / UPDATE / DELETE shapes
// now landed on the local DML executor (see `Rows` for the
// streaming-insert alternative).
SQL string `yaml:"sql,omitempty"`
// RowAccessPolicy creates a row-access policy via the REST API.
RowAccessPolicy *RowAccessPolicySetup `yaml:"row_access_policy,omitempty"`
// ColumnGovernance sets column-level masking metadata via the
// engine catalog RPC (through the gateway's tables.patch hook).
ColumnGovernance *ColumnGovernanceSetup `yaml:"column_governance,omitempty"`
// ConnectionFixture seeds EXTERNAL_QUERY snapshots under data_dir.
ConnectionFixture *ConnectionFixtureSetup `yaml:"connection_fixture,omitempty"`
}
// RowsSetup describes a `tabledata.insertAll` setup step. Each entry
// in `Rows` is a column-name -> cell-value map, matching the same
// shape as `Expectation.Rows`.
type RowsSetup struct {
Dataset string `yaml:"dataset"`
Table string `yaml:"table"`
Rows []map[string]any `yaml:"rows"`
}
// TableSetup describes a table to create via REST. The schema is the
// usual BigQuery TableFieldSchema shape. When External is set the
// runner POSTs an external table (Google Sheets, GCS, ...). When View
// is set the runner POSTs a logical view (a `view.query` body, the
// shape the Python/Java/Go clients send for `create_table(Table)` with
// a view definition) so fixtures can exercise the REST view-creation
// path distinctly from `CREATE VIEW` DDL.
type TableSetup struct {
Dataset string `yaml:"dataset"`
ID string `yaml:"id"`
Schema []SchemaColumn `yaml:"schema,omitempty"`
External *ExternalTableSetup `yaml:"external,omitempty"`
View *ViewTableSetup `yaml:"view,omitempty"`
}
// ExternalTableSetup is the externalDataConfiguration block for setup.
type ExternalTableSetup struct {
SourceFormat string `yaml:"source_format"`
SourceURIs []string `yaml:"source_uris"`
Autodetect bool `yaml:"autodetect,omitempty"`
}
// ViewTableSetup is the `view` block for a tables.insert setup step.
// Only the defining query is modeled; the emulator infers the view
// schema from it (matching production BigQuery, which lets clients
// omit the schema on a view insert).
type ViewTableSetup struct {
Query string `yaml:"query"`
}
// ConnectionFixtureSetup copies committed connection snapshots into the
// emulator data_dir before EXTERNAL_QUERY runs.
type ConnectionFixtureSetup struct {
ConnectionID string `yaml:"connection_id"`
SourceDir string `yaml:"source_dir"`
}
func (t *TableSetup) validate() error {
if t.Dataset == "" {
return errors.New("table.dataset is required")
}
if t.ID == "" {
return errors.New("table.id is required")
}
if len(t.Schema) == 0 && t.External == nil && t.View == nil {
return errors.New("table.schema must list at least one column (or set table.external / table.view)")
}
if t.External != nil && t.External.SourceFormat == "" {
return errors.New("table.external.source_format is required")
}
if t.View != nil && strings.TrimSpace(t.View.Query) == "" {
return errors.New("table.view.query is required")
}
return nil
}
// SchemaColumn maps directly to `bqtypes.TableFieldSchema`. We keep
// this as a runner-local struct so the YAML field names (lower-snake)
// stay decoupled from the wire-shape Go struct.
type SchemaColumn struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
Mode string `yaml:"mode,omitempty"`
Description string `yaml:"description,omitempty"`
Fields []SchemaColumn `yaml:"fields,omitempty"`
PolicyTags []string `yaml:"policy_tags,omitempty"`
}
// RowAccessPolicySetup describes a rowAccessPolicies.insert setup step.
type RowAccessPolicySetup struct {
Dataset string `yaml:"dataset"`
Table string `yaml:"table"`
PolicyID string `yaml:"policy_id"`
FilterPredicate string `yaml:"filter_predicate"`
Grantees []string `yaml:"grantees,omitempty"`
}
// ColumnGovernanceSetup sets column mask metadata on an existing table.
type ColumnGovernanceSetup struct {
Dataset string `yaml:"dataset"`
Table string `yaml:"table"`
Column string `yaml:"column"`
MaskKind string `yaml:"mask_kind"`
PolicyTag string `yaml:"policy_tag,omitempty"`
}
// Expectation captures one of two assertion modes. Exactly one of
// `Rows` or `Error` must be set (with the exception of
// `Match==schema_only`, which may set neither and rely on the
// gateway-returned schema alone).
type Expectation struct {
// Match controls how Rows are compared against the gateway's
// response. One of `ordered` (default), `unordered`, or
// `schema_only`. See `conformance/README.md` for the matching
// semantics each mode implies.
Match MatchMode `yaml:"match,omitempty"`
// Schema is the optional list of expected output columns. The
// diff engine uses it for two things:
//
// 1. `schema_only` mode: required for the schema-vs-schema
// assertion (the engine compares this list against the
// `QueryResponse.schema` returned by the gateway).
// 2. `ordered` / `unordered` modes: advisory, used to
// double-check the column set the query actually returned
// before diffing rows. When omitted, the runner trusts
// the gateway-supplied schema.
Schema []ExpectedColumn `yaml:"schema,omitempty"`
// Rows is the expected row set for a successful query. Each
// row is a column-name -> cell-value map. The diff engine
// normalizes both sides per the column's SQL type from the
// gateway's `QueryResponse.schema` (so INT64 `1` matches
// `"1"`, FLOAT64 compares with a relative epsilon, NULL stays
// distinct from the literal string "NULL", etc.). See
// `conformance/README.md` for the full type table.
//
// Ignored when `Match==schema_only`.
Rows []map[string]any `yaml:"rows,omitempty"`
// Error pins the expected error envelope when the fixture
// intends to verify a failure mode (e.g. invalid SQL).
Error *ExpectedError `yaml:"error,omitempty"`
// Route is the canonical lowercase-snake `Disposition` the
// coordinator's `RouteClassifier` MUST have chosen for this
// fixture (one of `duckdb_native`, `duckdb_rewrite`,
// `duckdb_udf`, `semantic_executor`, `control_op`,
// `local_stub`, `unsupported`; mirrors
// `backend/engine/disposition.cc::DispositionToString`).
// Compared against the response's
// `Job.statistics.query.emulatorRoute` (loopback-only field
// gated by `gateway/middleware/loopback.go`).
//
// For Storage Read / Write fixtures and other RPC families that
// don't go through `LocalCoordinatorEngine`, leave this empty
// and use `RouteStrict=false` with an empty `RouteAllowlist`
// (the runner then skips the route assertion entirely; see
// the package doc above the field set for the rationale).
//
// Ownership: `docs/ENGINE_POLICY.md`.
Route string `yaml:"route,omitempty"`
// RouteAllowlist enumerates the route names the runner accepts
// when `RouteStrict=false`. Useful for shapes that are
// deliberately flexible between, say, `duckdb_native` and
// `duckdb_rewrite` because the transpiler's choice is an
// implementation detail (not a fixture-meaningful behavior).
//
// Empty + `RouteStrict=false` AND a non-empty `Route` is the
// "document-the-intent" pattern used by error-path fixtures:
// the engine returns before `EmitTrailers` fires so an actual
// route never reaches the runner, but the fixture writer can
// still pin `route: unsupported` for the matrix walker. The
// runner treats actual=="" as a skip in relaxed mode.
//
// When `RouteStrict=true` (the default) the runner ignores
// `RouteAllowlist` and asserts the route equals `Route`
// exactly. Spelling validation: every entry must be one of the
// canonical disposition names; unknown entries are a
// fixture-load error so a typo can't accidentally widen the
// allowlist.
RouteAllowlist []string `yaml:"route_allowlist,omitempty"`
// RouteStrict toggles between exact-match (default) and
// `RouteAllowlist`-membership comparison. Defaults to `true`
// when omitted via the `*bool` indirection (a missing key is
// strict, an explicit `false` opts in to the allowlist mode).
// The pointer type mirrors how `Fixture` distinguishes a
// missing optional from an explicit zero value.
RouteStrict *bool `yaml:"route_strict,omitempty"`
}
// MatchMode is the row-comparison strategy declared by a fixture.
// Default is MatchOrdered.
type MatchMode string
const (
// MatchOrdered (the default) compares rows pairwise in
// declaration order. Use `ORDER BY` in the fixture query so the
// comparison stays deterministic.
MatchOrdered MatchMode = "ordered"
// MatchUnordered compares rows as a multiset; the diff engine
// canonicalizes every row to a type-normalized string and
// asserts the two multisets are equal. Useful when the query
// does not declare an ORDER BY and the storage engine returns
// rows in implementation-defined order (DuckDB, parallel
// scans, etc.).
MatchUnordered MatchMode = "unordered"
// MatchSchemaOnly ignores `Rows` entirely and only validates
// the column names + types returned by the query. Useful for
// queries whose row values are non-deterministic (CURRENT_*,
// generated IDs) and for "dryRun" style smoke checks.
MatchSchemaOnly MatchMode = "schema_only"
)
// ExpectedColumn is one entry in `Expectation.Schema`. The Type field
// is compared case-insensitively against the gateway's wire-format
// type (`STRING`, `INT64`, `FLOAT64`, etc.) so a fixture pinning
// `INTEGER` will still match a response advertising `INT64`.
type ExpectedColumn struct {
Name string `yaml:"name"`
Type string `yaml:"type"`
Mode string `yaml:"mode,omitempty"`
}
// ExpectedError captures the assertion vocabulary for the error path.
// Both fields are optional; the runner asserts only on what is set.
type ExpectedError struct {
// Code is the expected HTTP status code, e.g. 400 / 404 / 501.
// Zero means "do not assert on the status code". A fixture
// must set at least one of Code or MessageContains.
Code int `yaml:"code,omitempty"`
// MessageContains is a substring the runner expects to find
// in the BigQuery error envelope's top-level `error.message`
// field (with a fallback to `error.errors[0].message`).
MessageContains string `yaml:"message_contains,omitempty"`
}
// defaultProfiles is the set Fixture.Profiles defaults to when the
// fixture omits it. Keep alphabetized so iteration order is stable
// across the matrix.
var defaultProfiles = []string{ProfileDuckDB}
// Load parses a single YAML file into a Fixture. It validates the
// shape (required fields, exclusivity of expectation, known profile
// names) so callers can rely on the returned Fixture being usable.
func Load(path string) (*Fixture, error) {
// #nosec G304 -- path is fixture-discovery output controlled by
// --fixtures flag in a CLI dev tool.
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read %s: %w", path, err)
}
return loadBytes(data, path)
}
// loadBytes is the test seam for Load. Tests construct YAML in memory
// and pass it through here; production code goes via Load (which is a
// thin file-read wrapper).
func loadBytes(data []byte, path string) (*Fixture, error) {
var f Fixture
dec := yaml.NewDecoder(strings.NewReader(string(data)))
dec.KnownFields(true)
if err := dec.Decode(&f); err != nil {
return nil, fmt.Errorf("parse %s: %w", path, err)
}
f.Path = path
if err := f.normalize(); err != nil {
return nil, fmt.Errorf("validate %s: %w", path, err)
}
return &f, nil
}
// LoadDir walks a directory (recursively) and returns every loadable
// `.yaml` / `.yml` fixture, sorted by path. If `pathOrDir` points at
// a regular file it loads just that file. Returns the slice and the
// first error encountered (mirroring `filepath.Walk` semantics) so a
// single bad fixture stops the run with a clear pointer rather than
// silently dropping it.
func LoadDir(pathOrDir string) ([]*Fixture, error) {
info, err := os.Stat(pathOrDir)
if err != nil {
return nil, fmt.Errorf("stat %s: %w", pathOrDir, err)
}
if !info.IsDir() {
f, err := Load(pathOrDir)
if err != nil {
return nil, err
}
return []*Fixture{f}, nil
}
var fixtures []*Fixture
walkErr := filepath.Walk(pathOrDir, func(p string, fi os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
// Skip directories whose basename starts with `_`. Used
// for `conformance/fixtures/_route_drift_example/` and
// future quarantine families that should NOT run in
// `task conformance:run`. The leading-underscore
// convention mirrors Bazel's `_*_test.cc` quarantine
// pattern. Explicitly loading the fixture file with
// `Load(...)` still works (the runner / matrix walker
// can opt in by passing the file path directly).
if fi.IsDir() {
base := filepath.Base(p)
if base != filepath.Base(pathOrDir) && strings.HasPrefix(base, "_") {
return filepath.SkipDir
}
return nil
}
ext := strings.ToLower(filepath.Ext(p))
if ext != ".yaml" && ext != ".yml" {
return nil
}
f, err := Load(p)
if err != nil {
return err
}
fixtures = append(fixtures, f)
return nil
})
if walkErr != nil {
return nil, walkErr
}
sort.Slice(fixtures, func(i, j int) bool {
return fixtures[i].Path < fixtures[j].Path
})
return fixtures, nil
}
// normalize applies defaults and validates required fields.
func (f *Fixture) normalize() error {
if strings.TrimSpace(f.Name) == "" {
return errors.New("name is required")
}
if strings.TrimSpace(f.Query) == "" {
return errors.New("query is required")
}
if f.ProjectID == "" {
f.ProjectID = "proj-conformance-" + sanitizeID(f.Name)
}
if len(f.Profiles) == 0 {
f.Profiles = append([]string(nil), defaultProfiles...)
}
known := make(map[string]bool, len(KnownProfiles()))
for _, p := range KnownProfiles() {
known[p.Name] = true
}
for _, p := range f.Profiles {
if !known[p] {
return fmt.Errorf("unknown profile %q (known: %s)",
p, strings.Join(profileNames(), ", "))
}
}
if err := f.validateExpectation(); err != nil {
return err
}
for i, step := range f.Setup {
if err := step.validate(); err != nil {
return fmt.Errorf("setup[%d]: %w", i, err)
}
}
return nil
}
func (f *Fixture) validateExpectation() error {
if f.Expected.Match == "" {
f.Expected.Match = MatchOrdered
}
switch f.Expected.Match {
case MatchOrdered, MatchUnordered, MatchSchemaOnly:
default:
return fmt.Errorf(
"expected.match=%q is not one of ordered, unordered, schema_only",
f.Expected.Match)
}
hasRows := f.Expected.Rows != nil
hasSchema := len(f.Expected.Schema) > 0
hasErr := f.Expected.Error != nil
if hasErr && (hasRows || hasSchema) {
return errors.New(
"expected: error cannot be combined with rows or schema")
}
switch f.Expected.Match {
case MatchSchemaOnly:
// schema_only fixtures must either declare an explicit
// schema: block OR a rows: block (whose first row's keys
// are used as the expected column-name set). Otherwise
// there is nothing to assert on.
if !hasErr && !hasRows && !hasSchema {
return errors.New(
"expected: match=schema_only requires schema or rows (column names)")
}
default:
// ordered / unordered must set rows: or error:.
if !hasRows && !hasErr {
return errors.New("expected: must set either rows or error")
}
}
if hasErr {
e := f.Expected.Error
if e.Code == 0 && e.MessageContains == "" {
return errors.New("expected.error: must set at least one of code or message_contains")
}
}
if err := f.Expected.validateRoute(); err != nil {
return err
}
return nil
}
// validateRoute enforces the spelling rules on the route assertion
// fields so a typo in `expected.route` or
// `expected.route_allowlist` fails the load instead of silently
// allowing a route the fixture writer did not intend.
func (e *Expectation) validateRoute() error {
if e.Route != "" && !isKnownRouteName(e.Route) {
return fmt.Errorf(
"expected.route=%q is not a known disposition (one of %s)",
e.Route, strings.Join(KnownRouteNames(), ", "))
}
for i, r := range e.RouteAllowlist {
if !isKnownRouteName(r) {
return fmt.Errorf(
"expected.route_allowlist[%d]=%q is not a known disposition (one of %s)",
i, r, strings.Join(KnownRouteNames(), ", "))
}
}
if e.RouteStrictDefault() && len(e.RouteAllowlist) > 0 {
return errors.New(
"expected.route_allowlist must not be set when route_strict=true (use route_strict=false)")
}
return nil
}
// RouteStrictDefault reports the runner's interpretation of the
// optional `route_strict` field: true when the fixture omitted the
// key (the safe default), the explicit value otherwise. Exposed for
// the runner comparison and the matrix walker so neither has to
// duplicate the pointer-vs-default logic.
func (e *Expectation) RouteStrictDefault() bool {
if e.RouteStrict == nil {
return true
}
return *e.RouteStrict
}
func (s SetupStep) validate() error {
count := 0
if s.Dataset != "" {
count++
}
if s.Table != nil {
count++
if err := s.Table.validate(); err != nil {
return err
}
}
if s.Rows != nil {
count++
if s.Rows.Dataset == "" {
return errors.New("rows.dataset is required")
}
if s.Rows.Table == "" {
return errors.New("rows.table is required")
}
if len(s.Rows.Rows) == 0 {
return errors.New("rows.rows must list at least one row")
}
}
if strings.TrimSpace(s.SQL) != "" {
count++
}
if s.RowAccessPolicy != nil {
count++
if s.RowAccessPolicy.Dataset == "" || s.RowAccessPolicy.Table == "" ||
s.RowAccessPolicy.PolicyID == "" || s.RowAccessPolicy.FilterPredicate == "" {
return errors.New("row_access_policy requires dataset, table, policy_id, filter_predicate")
}
}
if s.ColumnGovernance != nil {
count++
if s.ColumnGovernance.Dataset == "" || s.ColumnGovernance.Table == "" ||
s.ColumnGovernance.Column == "" || s.ColumnGovernance.MaskKind == "" {
return errors.New("column_governance requires dataset, table, column, mask_kind")
}
}
if s.ConnectionFixture != nil {
count++
if s.ConnectionFixture.ConnectionID == "" || s.ConnectionFixture.SourceDir == "" {
return errors.New("connection_fixture requires connection_id and source_dir")
}
}
switch count {
case 0:
return errors.New(
"setup step must set exactly one of dataset, table, rows, sql, row_access_policy, column_governance, connection_fixture",
)
case 1:
return nil
default:
return errors.New(
"setup step must set exactly one of dataset, table, rows, sql, row_access_policy, column_governance",
)
}
}
// sanitizeID lowercases the fixture name and replaces non-[a-z0-9-]
// characters with `-`. Used to derive default project IDs that
// satisfy BigQuery's project-ID grammar (the emulator does not
// strictly enforce it today, but we keep the defaults compatible so
// fixtures port to a real backend cleanly).
func sanitizeID(s string) string {
var b strings.Builder
b.Grow(len(s))
for _, r := range strings.ToLower(s) {
switch {
case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
b.WriteRune(r)
case r == '-':
b.WriteRune('-')
default:
b.WriteRune('-')
}
}
return b.String()
}
package runner
import (
"context"
"encoding/json"
"fmt"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// CompareRows diffs actual query rows against an Expectation using the
// same typed-cell engine as the YAML fixture lane. Returns an empty
// string on match.
func CompareRows(exp Expectation, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
return rowDiff(exp, schema, actualRows)
}
// QueryViaGateway posts a GoogleSQL statement to the gateway's
// jobs.query endpoint and returns the HTTP status plus raw body.
func QueryViaGateway(ctx context.Context, baseURL, sql string) (int, []byte, error) {
return postQuery(ctx, baseURL, sql)
}
// SetupSQLViaGateway runs an arbitrary statement through jobs.query for
// catalog seeding (CREATE TABLE, etc.).
func SetupSQLViaGateway(ctx context.Context, baseURL, sql string) error {
status, body, err := postQuery(ctx, baseURL, sql)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("setup sql -> %d: %s", status, snippet(body))
}
return nil
}
// DoRequest posts JSON to a gateway URL. Exported for sub-lanes.
func DoRequest(ctx context.Context, url string, body []byte) (int, []byte, error) {
return doRequest(ctx, url, body)
}
func postQuery(ctx context.Context, baseURL, sql string) (int, []byte, error) {
return postQueryWithDefaultDataset(ctx, baseURL, sql, "")
}
func postQueryWithDefaultDataset(ctx context.Context, baseURL, sql, defaultDataset string) (int, []byte, error) {
queryBody, err := MarshalJobsQueryBody(sql, defaultDataset, nil)
if err != nil {
return 0, nil, err
}
return doRequest(ctx, baseURL+"/queries", queryBody)
}
// MarshalJobsQueryBody builds the jobs.query JSON body. Exported for sub-lanes.
func MarshalJobsQueryBody(sql, defaultDataset string, params []bqtypes.QueryParameter) ([]byte, error) {
body := map[string]any{
"query": sql,
"useLegacySql": false,
}
if defaultDataset != "" {
body["defaultDataset"] = map[string]string{"datasetId": defaultDataset}
}
if len(params) > 0 {
body["parameterMode"] = "NAMED"
body["queryParameters"] = params
}
queryBody, err := json.Marshal(body)
if err != nil {
return nil, fmt.Errorf("marshal query: %w", err)
}
return queryBody, nil
}
func marshalJobsQueryBody(sql, defaultDataset string) ([]byte, error) {
return MarshalJobsQueryBody(sql, defaultDataset, nil)
}
package runner
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"net"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/vantaboard/bigquery-emulator/gateway"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
)
const httpMethodGet = "GET"
// spawnState captures everything needed to restart a spawned emulator with the
// same --data_dir and profile flags.
type spawnState struct {
engineBinary string
engineArgs []string
engineAddr string
profile Profile
harnessOpts HarnessOptions
}
// engineReadyTimeout matches the value the production gateway uses;
// keeps the cold-start budget consistent across CI lanes.
const engineReadyTimeout = 30 * time.Second
// EmulatorEnv is one running emulator the runner can drive: an
// HTTP gateway (in-process) sitting in front of either a subprocess
// engine the runner spawned or an already-running engine the runner
// dialed via `--connect`.
//
// The struct intentionally mirrors `gateway/e2e/catalog_test.go::emulatorEnv`
// so a future plan could fold the two into a shared package. Today
// the e2e harness is in `package e2e` with the `integration` build
// tag, so the runner needs its own copy.
type EmulatorEnv struct {
// BaseURL is the gateway's HTTP root. Callers concatenate
// `/bigquery/v2/projects/...` onto it.
BaseURL string
httpServer *httptest.Server
client *engine.Client
// cmd is set when the runner spawned `emulator_main` itself.
// nil when --connect is in use; Close in that mode only tears
// down the HTTP server + gRPC channel.
cmd *exec.Cmd
// dataDir is the temporary `--data_dir` the harness allocated
// for this emulator. The teardown path removes it.
dataDir string
// spawn is set for subprocess engines so RestartEngine can relaunch
// against the same data_dir mid-session.
spawn *spawnState
}
// DataDir returns the scratch --data_dir for a spawned emulator (empty when --connect).
func (e *EmulatorEnv) DataDir() string {
if e == nil {
return ""
}
return e.dataDir
}
// Close terminates the subprocess (if any), closes the gRPC channel,
// and shuts down the HTTP gateway. Safe to call more than once.
func (e *EmulatorEnv) Close() error {
if e == nil {
return nil
}
var firstErr error
if e.httpServer != nil {
e.httpServer.Close()
}
if e.client != nil {
if err := e.client.Close(); err != nil {
firstErr = err
}
}
if e.cmd != nil && e.cmd.Process != nil {
// Best-effort graceful shutdown: SIGINT first, then KILL
// after a short budget. The C++ engine registers a SIGINT
// handler that flushes the storage layer; KILL is the
// belt-and-suspenders path for a wedged subprocess.
_ = e.cmd.Process.Signal(os.Interrupt)
done := make(chan struct{})
go func() {
_, _ = e.cmd.Process.Wait()
close(done)
}()
select {
case <-done:
case <-time.After(5 * time.Second):
_ = e.cmd.Process.Kill()
<-done
}
}
if e.dataDir != "" {
if err := os.RemoveAll(e.dataDir); err != nil && firstErr == nil {
firstErr = err
}
}
return firstErr
}
// HarnessOptions configures how the runner spins up the emulator for
// one fixture x profile execution. Callers either set EngineBinary
// (the runner spawns its own emulator subprocess and tears it down
// after the fixture) or ConnectAddress (the runner dials an
// already-running engine on `host:port`).
type HarnessOptions struct {
// EngineBinary is the path to `emulator_main`. Defaults to
// `./bin/emulator_main` when empty. Mutually exclusive with
// ConnectAddress.
EngineBinary string
// ConnectAddress is `host:port` for an already-running engine.
// Empty means the harness spawns its own subprocess. Mutually
// exclusive with EngineBinary.
ConnectAddress string
// EngineStdout / EngineStderr receive the engine subprocess's
// streams. nil discards them; tests typically pass `os.Stderr`
// to keep crash output visible.
EngineStdout io.Writer
EngineStderr io.Writer
// DataDirRoot is the parent directory under which the harness
// allocates per-emulator `--data_dir` paths for the DuckDB
// profile. Empty defers to `os.TempDir()`.
DataDirRoot string
}
// validate enforces the exclusivity contract between EngineBinary and
// ConnectAddress and resolves the EngineBinary default.
func (o *HarnessOptions) validate() error {
if o.EngineBinary != "" && o.ConnectAddress != "" {
return errors.New("HarnessOptions: --engine-binary and --connect are mutually exclusive")
}
if o.EngineBinary == "" && o.ConnectAddress == "" {
o.EngineBinary = filepath.Join(".", "bin", "emulator_main")
}
return nil
}
// StartEmulator spins up an EmulatorEnv for the given profile. The
// returned env owns its subprocess (if any) and must be Closed by the
// caller -- the harness registers no global cleanup, so the caller
// (typically the runner loop) is responsible for orderly teardown.
//
// When ConnectAddress is set the profile only controls which fixtures
// run against the connected gateway: the harness does not push
// `--engine` / `--storage` over the wire, so the connected emulator
// must already be configured for the requested profile (CI wires
// this).
func StartEmulator(ctx context.Context, opts HarnessOptions, p Profile) (*EmulatorEnv, error) {
if err := opts.validate(); err != nil {
return nil, err
}
if opts.ConnectAddress != "" {
return startConnected(ctx, opts)
}
return startSpawned(ctx, opts, p)
}
// startConnected dials an already-running engine on the configured
// address and wires an in-process HTTP gateway in front of it. The
// returned env's cmd field is nil; Close only releases the channel
// and the HTTP server.
func startConnected(ctx context.Context, opts HarnessOptions) (*EmulatorEnv, error) {
client, err := engine.Dial(opts.ConnectAddress)
if err != nil {
return nil, fmt.Errorf("dial connect=%s: %w", opts.ConnectAddress, err)
}
if err := client.WaitForReady(ctx); err != nil {
_ = client.Close()
return nil, fmt.Errorf("connected engine not ready at %s: %w",
opts.ConnectAddress, err)
}
handler := gateway.NewServer(gateway.Options{}, handlers.BuildDependencies(client), client)
srv := httptest.NewServer(handler)
return &EmulatorEnv{
BaseURL: srv.URL,
httpServer: srv,
client: client,
}, nil
}
// startSpawned launches a fresh `emulator_main` subprocess with the
// profile's flags, waits for its gRPC health service to flip to
// SERVING, and returns an env that owns the subprocess.
func startSpawned(ctx context.Context, opts HarnessOptions, p Profile) (*EmulatorEnv, error) {
if _, err := os.Stat(opts.EngineBinary); err != nil {
return nil, fmt.Errorf("engine binary not found at %s: %w "+
"(build with `task emulator:build-engine:bazel` or pass "+
"--connect HOST:PORT)", opts.EngineBinary, err)
}
args, dataDir, addr, err := prepareSpawnArgs(opts, p)
if err != nil {
return nil, err
}
cmd, err := launchEngine(opts, args)
if err != nil {
if dataDir != "" {
_ = os.RemoveAll(dataDir)
}
return nil, fmt.Errorf("spawn %s: %w", opts.EngineBinary, err)
}
// Once the process is alive, every error path needs to reap it.
// `cleanup` runs on every failure below; success transfers
// ownership to EmulatorEnv.
var (
client *engine.Client
srv *httptest.Server
)
cleanup := newSpawnCleanup(cmd, dataDir, &client, &srv)
client, err = waitForReady(ctx, addr)
if err != nil {
cleanup()
return nil, err
}
srv = httptest.NewServer(
gateway.NewServer(
gateway.Options{},
handlers.BuildDependenciesWith(client, handlers.DepsOptions{DataDir: dataDir}),
client,
),
)
return &EmulatorEnv{
BaseURL: srv.URL,
httpServer: srv,
client: client,
cmd: cmd,
dataDir: dataDir,
spawn: &spawnState{
engineBinary: opts.EngineBinary,
engineArgs: append([]string(nil), args...),
engineAddr: addr,
profile: p,
harnessOpts: opts,
},
}, nil
}
// prepareSpawnArgs allocates a free port + scratch data_dir for a
// fresh `emulator_main`, and assembles the argv. Errors are wrapped
// so the caller can return them straight back.
func prepareSpawnArgs(opts HarnessOptions, p Profile) (args []string, dataDir, addr string, err error) {
port, err := freePort()
if err != nil {
return nil, "", "", fmt.Errorf("allocate engine port: %w", err)
}
addr = net.JoinHostPort("127.0.0.1", strconv.Itoa(port))
args = append([]string{"--host_port", addr}, p.EmulatorMainArgs()...)
// DuckDB storage always needs a persistent --data_dir; give each
// spawn its own temp directory so concurrent profile runs do not
// collide on the same catalog.
root := opts.DataDirRoot
if root == "" {
root = os.TempDir()
}
dataDir, err = os.MkdirTemp(root, "bq-conformance-")
if err != nil {
return nil, "", "", fmt.Errorf("create data_dir: %w", err)
}
args = append(args, "--data_dir", dataDir)
return args, dataDir, addr, nil
}
// launchEngine fires up the configured engine binary with the
// pre-built argv and the operator-supplied stdio sinks.
func launchEngine(opts HarnessOptions, args []string) (*exec.Cmd, error) {
// #nosec G204 -- emulator binary path is operator-supplied via
// --engine-binary; runner is a CLI dev tool.
cmd := exec.Command(opts.EngineBinary, args...)
cmd.Stdout = opts.EngineStdout
cmd.Stderr = opts.EngineStderr
if startErr := cmd.Start(); startErr != nil {
return nil, startErr
}
return cmd, nil
}
// waitForReady dials the engine's gRPC port and blocks until the
// health check flips to SERVING (or the engineReadyTimeout fires).
// Returns an *engine.Client owned by the caller.
func waitForReady(ctx context.Context, addr string) (*engine.Client, error) {
client, err := engine.Dial(addr)
if err != nil {
return nil, fmt.Errorf("dial %s: %w", addr, err)
}
readyCtx, cancel := context.WithTimeout(ctx, engineReadyTimeout)
defer cancel()
if err := client.WaitForReady(readyCtx); err != nil {
_ = client.Close()
return nil, fmt.Errorf("emulator at %s not ready: %w", addr, err)
}
return client, nil
}
// newSpawnCleanup returns a sync.Once-guarded teardown closure that
// reaps the engine subprocess (SIGINT, then KILL after a 5s budget),
// unwires the gateway test server + grpc client if they made it that
// far, and removes the scratch data_dir. The pointer-to-pointer
// indirection lets startSpawned wire the closure before the client
// and httptest.Server exist.
func newSpawnCleanup(cmd *exec.Cmd, dataDir string, clientPtr **engine.Client, srvPtr **httptest.Server) func() {
var once sync.Once
return func() {
once.Do(func() {
if srv := *srvPtr; srv != nil {
srv.Close()
}
if client := *clientPtr; client != nil {
_ = client.Close()
}
_ = cmd.Process.Signal(os.Interrupt)
done := make(chan struct{})
go func() {
_, _ = cmd.Process.Wait()
close(done)
}()
select {
case <-done:
case <-time.After(5 * time.Second):
_ = cmd.Process.Kill()
<-done
}
if dataDir != "" {
_ = os.RemoveAll(dataDir)
}
})
}
}
// freePort returns an available loopback TCP port. Mirrors the
// pattern `net/http/httptest` uses: bind on :0, capture the port,
// close immediately. The race with the subprocess's bind is the same
// race the standard library accepts.
func freePort() (int, error) {
lis, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
return 0, err
}
port := lis.Addr().(*net.TCPAddr).Port
if err := lis.Close(); err != nil {
return 0, err
}
return port, nil
}
// DoHTTPRequest issues an arbitrary JSON HTTP call against the gateway.
func DoHTTPRequest(ctx context.Context, method, url string, body []byte) (int, []byte, error) {
method = strings.ToUpper(strings.TrimSpace(method))
var bodyReader io.Reader
if len(body) > 0 {
bodyReader = bytes.NewReader(body)
}
req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
if err != nil {
return 0, nil, fmt.Errorf("build request %s %s: %w", method, url, err)
}
if bodyReader != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return 0, nil, fmt.Errorf("http %s %s: %w", method, url, err)
}
defer func() { _ = resp.Body.Close() }()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return resp.StatusCode, nil, fmt.Errorf("read body from %s %s: %w", method, url, err)
}
return resp.StatusCode, respBody, nil
}
// RestartEngine gracefully stops the spawned subprocess and relaunches it with
// the same argv (including --data_dir). No-op when --connect was used.
func (e *EmulatorEnv) RestartEngine(ctx context.Context) error {
if e == nil || e.spawn == nil {
return errors.New("restart requires a spawned emulator (not --connect mode)")
}
if e.httpServer != nil {
e.httpServer.Close()
e.httpServer = nil
}
if e.client != nil {
if err := e.client.Close(); err != nil {
return fmt.Errorf("close engine client before restart: %w", err)
}
e.client = nil
}
if e.cmd != nil && e.cmd.Process != nil {
_ = e.cmd.Process.Signal(os.Interrupt)
done := make(chan struct{})
go func() {
_, _ = e.cmd.Process.Wait()
close(done)
}()
select {
case <-done:
case <-time.After(5 * time.Second):
_ = e.cmd.Process.Kill()
<-done
}
}
cmd, err := launchEngine(e.spawn.harnessOpts, e.spawn.engineArgs)
if err != nil {
return fmt.Errorf("restart spawn: %w", err)
}
e.cmd = cmd
client, err := waitForReady(ctx, e.spawn.engineAddr)
if err != nil {
_ = cmd.Process.Kill()
return err
}
e.client = client
e.httpServer = httptest.NewServer(
gateway.NewServer(gateway.Options{}, handlers.BuildDependencies(client), client))
e.BaseURL = e.httpServer.URL
return nil
}
// doRequest is the runner's slimmed-down HTTP helper. Every caller
// POSTs a JSON body, so the method is hard-coded to POST and the body
// is required (callers already gate on whether they have something to
// send before calling). Errors are wrapped with the URL so the
// runner-internal error path is easy to debug.
func doRequest(ctx context.Context, url string, body []byte) (int, []byte, error) {
return DoHTTPRequest(ctx, http.MethodPost, url, body)
}
// doPatchRequest POSTs a JSON body with HTTP PATCH. Used by setup steps
// that mutate table metadata (e.g. column governance via tables.patch).
func doPatchRequest(ctx context.Context, url string, body []byte) (int, []byte, error) {
return DoHTTPRequest(ctx, http.MethodPatch, url, body)
}
package runner
import (
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
)
const packagePreflightScript = `
import importlib.util
import json
import sys
def _module_name(pkg):
for sep in ("==", ">=", "<=", "!=", "~=", "<", ">"):
if sep in pkg:
return pkg.split(sep, 1)[0].strip()
return pkg.strip()
packages = json.load(sys.stdin)
missing = []
for pkg in packages:
mod = _module_name(pkg)
if importlib.util.find_spec(mod) is None:
missing.append(pkg)
json.dump({"missing": missing}, sys.stdout)
`
// optionalDependencySkipReason returns a non-empty message when any listed
// optional dependency is absent from the configured Python interpreter.
func optionalDependencySkipReason(deps []string) string {
if len(deps) == 0 {
return ""
}
python, err := resolveConformancePython()
if err != nil {
return "optional dependency check: " + err.Error()
}
missing, err := missingPythonPackages(python, deps)
if err != nil {
return "optional dependency check: " + err.Error()
}
if len(missing) == 0 {
return ""
}
return "optional dependencies not available: " + strings.Join(missing, ", ")
}
func resolveConformancePython() (string, error) {
if p := os.Getenv("BIGQUERY_EMULATOR_PYTHON"); p != "" {
if _, err := os.Stat(p); err != nil {
return "", fmt.Errorf("BIGQUERY_EMULATOR_PYTHON %q: %w", p, err)
}
return p, nil
}
if dataDir := os.Getenv("BIGQUERY_EMULATOR_DATA_DIR"); dataDir != "" {
managed := filepath.Join(dataDir, "python-udf-env", "bin", "python3")
if st, err := os.Stat(managed); err == nil && st.Mode()&0o111 != 0 {
return managed, nil
}
}
if path, err := exec.LookPath("python3"); err == nil {
return path, nil
}
return "", errors.New("python3 not found on PATH")
}
func missingPythonPackages(python string, packages []string) ([]string, error) {
payload, err := json.Marshal(packages)
if err != nil {
return nil, err
}
cmd := exec.Command(python, "-c", packagePreflightScript)
cmd.Stdin = strings.NewReader(string(payload))
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("python preflight: %w", err)
}
var parsed struct {
Missing []string `json:"missing"`
}
if err := json.Unmarshal(out, &parsed); err != nil {
return nil, fmt.Errorf("decode preflight response: %w", err)
}
return parsed.Missing, nil
}
package runner
import "sort"
// ProfileDuckDB is the canonical conformance-profile identifier for
// the DuckDB-backed runtime. Hoisted to a package const so fixtures,
// the CLI, and the runner all reference the same spelling.
const ProfileDuckDB = "duckdb"
// Profile is one named runtime configuration the runner can drive.
// Today there is only one (`duckdb`) since the emulator ships a
// single local-execution coordinator with DuckDB storage; the type
// is kept around so the fixture / CLI surface stays generic if a
// second profile lands later.
//
// `EmulatorMainArgs` is the flag list the harness passes when
// spawning `emulator_main`; the runner does not include
// `--host_port` here because the harness picks a free port per
// spawn.
type Profile struct {
// Name is the user-facing profile identifier. Fixtures reference
// this in their `profiles:` field, and the runner echoes it in
// every result line.
Name string
}
// EmulatorMainArgs returns the `emulator_main` flag list for this
// profile. The DuckDB profile has no engine / storage selector
// flags (those were removed when the reference-impl + in-memory
// storage backends were deleted), so this is an empty slice today.
// Keeping the helper means fixtures and the runner do not have to
// branch on profile name.
func (p Profile) EmulatorMainArgs() []string {
return nil
}
var profiles = []Profile{
{
Name: ProfileDuckDB,
},
}
// KnownProfiles returns a defensive copy of the known profile table.
// The slice is alphabetized by name so the matrix iteration order is
// stable.
func KnownProfiles() []Profile {
out := make([]Profile, len(profiles))
copy(out, profiles)
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
return out
}
// LookupProfile resolves a profile name to its Profile entry. Returns
// (zero, false) for unknown names.
func LookupProfile(name string) (Profile, bool) {
for _, p := range profiles {
if p.Name == name {
return p, true
}
}
return Profile{}, false
}
func profileNames() []string {
out := make([]string, 0, len(profiles))
for _, p := range profiles {
out = append(out, p.Name)
}
sort.Strings(out)
return out
}
package runner
import (
"encoding/json"
"fmt"
"io"
"strings"
)
// writeTextResult prints one fixture x profile result in human-readable
// form. The format is intentionally short so a sweep over hundreds of
// fixtures stays scannable.
func writeTextResult(w io.Writer, r Result) {
tag := string(r.Status)
prefix := ""
switch r.Status {
case StatusPass:
prefix = "PASS"
case StatusFail:
prefix = "FAIL"
case StatusSkip:
prefix = "SKIP"
default:
prefix = tag
}
_, _ = fmt.Fprintf(w, "[%s] %s (profile=%s, %dms)\n",
prefix, r.Fixture, r.Profile, r.DurationMs)
if r.Message != "" {
_, _ = fmt.Fprintf(w, " %s\n", r.Message)
}
if r.Diff != "" {
for line := range strings.SplitSeq(strings.TrimRight(r.Diff, "\n"), "\n") {
_, _ = fmt.Fprintf(w, " %s\n", line)
}
}
}
// writeTextSummary prints the matrix-level rollup. Mirrors
// `go test`'s `--- PASS` style so engineers reading the log don't
// have to learn a new vocabulary.
func writeTextSummary(w io.Writer, report *Report) {
_, _ = fmt.Fprintf(w, "---\n")
_, _ = fmt.Fprintf(w, "conformance: total=%d passed=%d failed=%d skipped=%d\n",
report.Summary.Total,
report.Summary.Passed,
report.Summary.Failed,
report.Summary.Skipped)
}
// writeJSONReport emits the report's machine-readable form. The
// schema is documented in `conformance/README.md` (the "JSON output
// shape" section). Plan-41 CI pivots on `schema_version`.
func writeJSONReport(w io.Writer, report *Report) error {
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
return enc.Encode(report)
}
package runner
import "slices"
// Go-side mirror of the C++ canonical `Disposition` vocabulary
// `backend/engine/disposition.cc::DispositionToString` produces.
// The two sides MUST agree letter-for-letter: a fixture writer
// pinning `expected.route: duckdb_native` only PASSes when the
// engine trails `emulator_route=duckdb_native`. Keep this list
// sorted by `node_dispositions.yaml`'s priority order (low ->
// high) so a reviewer can eyeball the relative weight if it ever
// comes up.
//
// Plan ownership: `docs/ENGINE_POLICY.md`
// (this file) and `docs/ENGINE_POLICY.md`
// (the C++ source of truth). A new disposition value lands on BOTH
// sides at once.
//
// We do NOT generate this from the C++ header at build time: the
// 7-entry list churns rarely (each new entry is a multi-plan
// rollout) and the parity check in `tools/check_disposition_parity`
// already catches a C++/YAML/Go drift before it ships.
// Canonical lowercase-snake disposition names. Must mirror
// `Disposition::k*` in `backend/engine/disposition.h`.
const (
// RouteDuckDBNative lowers to DuckDB SQL whose semantics
// already match BigQuery exactly.
RouteDuckDBNative = "duckdb_native"
// RouteDuckDBRewrite lowers to DuckDB SQL via a deliberate
// structural rewrite (struct/array shape rewrites, JSON
// operator mapping, ...). Same executor as duckdb_native.
RouteDuckDBRewrite = "duckdb_rewrite"
// RouteDuckDBUDF lowers to DuckDB SQL that calls one of the
// polyfill UDFs/macros registered at engine startup.
RouteDuckDBUDF = "duckdb_udf"
// RouteSemanticExecutor runs on the local row/value semantic
// executor instead of DuckDB SQL evaluation.
RouteSemanticExecutor = "semantic_executor"
// RouteControlOp is the DDL / metadata / catalog op route
// (CREATE TABLE / DROP TABLE / ALTER / pipe-DDL).
RouteControlOp = "control_op"
// RouteLocalStub is the deterministic BigQuery-shaped stub
// route for specialized features (KEYS.NEW_KEYSET, CREATE
// MODEL, ...). See `docs/ENGINE_POLICY.md`.
RouteLocalStub = "local_stub"
// RouteUnsupported surfaces a BigQuery-shaped `UNIMPLEMENTED`.
// See `docs/ENGINE_POLICY.md` for
// the unsupported families list.
RouteUnsupported = "unsupported"
)
// knownRoutes pins the closed set in priority order (matches the
// C++ enum declaration in `disposition.h`). Iterating this slice
// gives a stable, reviewable ordering in user-facing error
// messages.
var knownRoutes = []string{
RouteDuckDBNative,
RouteDuckDBRewrite,
RouteDuckDBUDF,
RouteSemanticExecutor,
RouteControlOp,
RouteLocalStub,
RouteUnsupported,
}
// KnownRouteNames returns a copy of the canonical disposition
// names in priority order. Used by validation messages and the
// matrix walker so they share one source of truth.
func KnownRouteNames() []string {
out := make([]string, len(knownRoutes))
copy(out, knownRoutes)
return out
}
// isKnownRouteName reports whether `s` is one of the canonical
// disposition names.
func isKnownRouteName(s string) bool {
return slices.Contains(knownRoutes, s)
}
package runner
import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"slices"
"sort"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// Status is the per-fixture verdict the runner emits.
type Status string
const (
StatusPass Status = "PASS"
StatusFail Status = "FAIL"
StatusSkip Status = "SKIP"
)
// JSONSchemaVersion is the on-the-wire `schema_version` the diff CI
// pivots on. Bumped only on a breaking output-shape change.
const JSONSchemaVersion = 1
// outputFormatText is the runner's default --output format: a
// human-readable text renderer. Hoisted to a const so the default,
// the validator, and the dispatcher all reference one source of truth.
const (
outputFormatText = "text"
outputFormatJSON = "json"
)
// Result is one fixture x profile outcome. The JSON tags mirror what
// the diff CI consumes; keep them stable.
type Result struct {
Fixture string `json:"fixture"`
Path string `json:"path"`
Profile string `json:"profile"`
Status Status `json:"status"`
DurationMs int64 `json:"duration_ms"`
Message string `json:"message,omitempty"`
Diff string `json:"diff,omitempty"`
}
// Summary is the matrix-level aggregate the JSON output starts with.
type Summary struct {
Total int `json:"total"`
Passed int `json:"passed"`
Failed int `json:"failed"`
Skipped int `json:"skipped"`
}
// Report is the top-level JSON payload. `schema_version` is the field
// the diff CI keys off, so a downstream consumer can refuse a report
// it does not understand without parsing the rest.
type Report struct {
SchemaVersion int `json:"schema_version"`
Summary Summary `json:"summary"`
Results []Result `json:"results"`
}
// Options bundles the CLI flags the runner needs to do its job. The
// CLI in `conformance/cmd/runner` parses these and hands the Options
// over without further interpretation.
type Options struct {
// FixturesPath points at the directory (or single file) the
// runner loads.
FixturesPath string
// Harness carries the engine-binary / connect / stdio settings.
Harness HarnessOptions
// Profiles restricts which profiles the matrix iterates over.
// Empty means "all known profiles".
Profiles []string
// UpdateBaselines overwrites the `expected:` block of every
// fixture with the actual response. Used to bootstrap new
// fixtures. When true, every fixture is reported as PASS
// regardless of the original expected block.
UpdateBaselines bool
// Output controls the renderer: "text" (default) or "json".
Output string
// Out / Err are the writers the renderer dispatches to.
// Default: os.Stdout / os.Stderr.
Out io.Writer
Err io.Writer
}
// Run executes the conformance matrix once and returns the resulting
// Report plus a non-nil error if a runner-internal failure occurred
// (bad YAML, can't start engine, output renderer crashed, etc).
//
// A fixture FAILing returns a non-nil Report with Summary.Failed > 0
// but a nil error. The CLI maps these to the documented exit codes
// (1 vs 2). Callers that want the exit-code semantics call ExitCode
// on the returned Report.
func Run(ctx context.Context, opts Options) (*Report, error) {
opts, err := prepareOptions(opts)
if err != nil {
return nil, err
}
fixtures, err := LoadDir(opts.FixturesPath)
if err != nil {
return nil, err
}
if len(fixtures) == 0 {
return nil, fmt.Errorf("no fixtures found under %s", opts.FixturesPath)
}
enabled, err := resolveProfiles(opts.Profiles)
if err != nil {
return nil, err
}
report := iterateMatrix(ctx, fixtures, enabled, opts)
if opts.Output == outputFormatJSON {
if err := writeJSONReport(opts.Out, report); err != nil {
return report, fmt.Errorf("write json report: %w", err)
}
} else {
writeTextSummary(opts.Out, report)
}
if opts.UpdateBaselines {
// `--update-baselines` rewrites fixtures in-place; the
// rewrite is wired into runOne (one rewrite per fixture x
// profile is harmless because subsequent rewrites land on
// the same canonical form).
_, _ = io.WriteString(opts.Err,
"runner: --update-baselines overwrote `expected:` blocks; review the diff before committing\n")
}
return report, nil
}
// prepareOptions defaults the unset fields of Options and validates
// the values that have a closed enum (currently just --output). Pulled
// out of Run so the orchestrator stays a flat 13-line driver.
func prepareOptions(opts Options) (Options, error) {
if opts.Out == nil {
opts.Out = os.Stdout
}
if opts.Err == nil {
opts.Err = os.Stderr
}
if opts.Output == "" {
opts.Output = outputFormatText
}
if opts.Output != outputFormatText && opts.Output != outputFormatJSON {
return opts, fmt.Errorf("unknown --output %q (want text or json)",
opts.Output)
}
if opts.FixturesPath == "" {
opts.FixturesPath = "conformance/fixtures"
}
return opts, nil
}
// iterateMatrix is the profile x fixture cross product driver. It
// fans each cell out to runOne, accumulates per-status counters, and
// streams text-mode results to opts.Out as they complete.
func iterateMatrix(ctx context.Context, fixtures []*Fixture, enabled []Profile, opts Options) *Report {
report := &Report{SchemaVersion: JSONSchemaVersion}
for _, p := range enabled {
for _, fx := range fixtures {
if !contains(fx.Profiles, p.Name) {
continue
}
result := runOne(ctx, fx, p, opts)
report.Results = append(report.Results, result)
report.Summary.Total++
switch result.Status {
case StatusPass:
report.Summary.Passed++
case StatusFail:
report.Summary.Failed++
case StatusSkip:
report.Summary.Skipped++
}
if opts.Output == outputFormatText {
writeTextResult(opts.Out, result)
}
}
}
return report
}
// ExitCode is the recommended process exit code derived from a
// Report. The CLI calls this directly so the runner's exit semantics
// are unit-testable.
func (r *Report) ExitCode() int {
if r == nil {
return 2
}
if r.Summary.Failed > 0 {
return 1
}
return 0
}
// runOne executes a single fixture x profile cell. The result is
// always non-nil; status is FAIL on any mismatch or runner-internal
// error during the lifecycle. The lifecycle is:
//
// 1. Boot a fresh emulator for the profile (or reuse the connected
// one).
// 2. Run setup steps in order against the gateway.
// 3. Run the fixture's query against the gateway.
// 4. Diff the response against expected rows or expected error.
//
// `--update-baselines` short-circuits the diff and rewrites the
// fixture in place with the captured rows / error envelope, so the
// fixture writer can bootstrap without authoring the expected block
// by hand.
func runOne(ctx context.Context, fx *Fixture, p Profile, opts Options) Result {
started := time.Now()
result := Result{
Fixture: fx.Name,
Path: fx.Path,
Profile: p.Name,
Status: StatusFail,
}
if reason := optionalDependencySkipReason(fx.OptionalDependencies); reason != "" {
result.Status = StatusSkip
result.Message = reason
return markDuration(result, started)
}
env, startErr := StartEmulator(ctx, opts.Harness, p)
if startErr != nil {
result.Message = "start emulator: " + startErr.Error()
return markDuration(result, started)
}
defer func() {
_ = env.Close()
}()
base := env.BaseURL + "/bigquery/v2/projects/" + fx.ProjectID
if stepErr := RunSetupSteps(ctx, base, env.dataDir, fx.Setup, fx.DefaultDataset); stepErr != nil {
result.Message = stepErr.Error()
return markDuration(result, started)
}
queryBody, marshalErr := MarshalJobsQueryBody(fx.Query, fx.DefaultDataset, nil)
if marshalErr != nil {
result.Message = marshalErr.Error()
return markDuration(result, started)
}
status, body, queryErr := doRequest(ctx, base+"/queries", queryBody)
if queryErr != nil {
result.Message = "query rpc: " + queryErr.Error()
return markDuration(result, started)
}
if fx.Expected.Error != nil {
return markDuration(runErrorPath(fx, opts, result, status, body), started)
}
return markDuration(runRowPath(fx, opts, result, status, body), started)
}
// markDuration stamps the elapsed wall time onto a Result. Pulled out
// of runOne so every early return can share the one-liner without
// re-templating the time.Since math.
func markDuration(r Result, started time.Time) Result {
r.DurationMs = time.Since(started).Milliseconds()
return r
}
// runErrorPath drives the error-mode branch of a fixture. It expects
// the engine to have failed (non-2xx) and the error envelope to match
// fx.Expected.Error; the --update-baselines mode rewrites the fixture
// in place using the actual response.
func runErrorPath(fx *Fixture, opts Options, result Result, status int, body []byte) Result {
if status >= 200 && status < 300 {
result.Message = "expected error, got success"
result.Diff = fmt.Sprintf("status: %d\nbody: %s",
status, snippet(body))
if opts.UpdateBaselines {
// Record the actual success result as the new
// baseline (rows) so the fixture writer can flip
// the assertion mode.
_ = rewriteFixtureRows(fx, body)
}
return result
}
if opts.UpdateBaselines {
if err := rewriteFixtureError(fx, status, body); err != nil {
result.Message = "update-baselines: " + err.Error()
return result
}
result.Status = StatusPass
result.Message = "baseline updated"
return result
}
if diff := errorDiff(*fx.Expected.Error, status, body); diff != "" {
result.Message = "error mismatch"
result.Diff = diff
return result
}
result.Status = StatusPass
return result
}
// runRowPath drives the row-mode branch of a fixture. It expects a
// 2xx response carrying a QueryResponse, then either rewrites the
// fixture (--update-baselines) or diffs the rows against fx.Expected.
func runRowPath(fx *Fixture, opts Options, result Result, status int, body []byte) Result {
if status < 200 || status >= 300 {
result.Message = fmt.Sprintf("query failed with HTTP %d", status)
result.Diff = "body: " + snippet(body)
return result
}
var run bqtypes.QueryResponse
if err := json.Unmarshal(body, &run); err != nil {
result.Message = "decode QueryResponse: " + err.Error()
result.Diff = "body: " + snippet(body)
return result
}
if opts.UpdateBaselines {
if err := rewriteFixtureRows(fx, body); err != nil {
result.Message = "update-baselines: " + err.Error()
return result
}
result.Status = StatusPass
result.Message = "baseline updated"
return result
}
if diff := rowDiff(fx.Expected, run.Schema, run.Rows); diff != "" {
switch fx.Expected.Match {
case MatchSchemaOnly:
result.Message = "schema mismatch"
case MatchUnordered:
result.Message = "row multiset mismatch"
default:
result.Message = "row mismatch"
}
result.Diff = diff
return result
}
actualRoute := ""
if run.Statistics != nil && run.Statistics.Query != nil {
actualRoute = run.Statistics.Query.EmulatorRoute
}
if diff := routeDiff(fx.Expected, actualRoute); diff != "" {
result.Message = "route mismatch"
result.Diff = diff
return result
}
result.Status = StatusPass
return result
}
// routeDiff compares the response's `emulatorRoute` value against
// the fixture's pinned `expected.route` (strict mode) or
// `expected.route_allowlist` (relaxed mode). Returns an empty
// string on match. The diagnostic always names both the actual and
// expected route so a fixture writer who triggered the assertion
// can see the drift without re-running the engine.
//
// Three comparison modes, all driven off the same Expectation:
//
// 1. No assertion: `route` AND `route_allowlist` both empty. Used
// by fixtures that pre-date the route-label machinery and by the
// deferred Storage Read / Write fixture families that don't go
// through the coordinator's classifier. Always passes.
//
// 2. Strict: `route_strict=true` (the default) with `route` set.
// Actual MUST equal `route` exactly. An empty actual is a
// hard fail because the runner always talks to a loopback
// emulator (the loopback middleware always populates the
// field on success-path responses).
//
// 3. Relaxed: `route_strict=false`. Actual MUST be in
// (`route` ∪ `route_allowlist`). An empty actual is treated as
// a skip rather than a fail so error-path fixtures (whose
// trailer is not emitted because the engine returns before
// `EmitTrailers` fires) can still pin `route: unsupported`
// for matrix documentation without breaking the runner.
func routeDiff(expected Expectation, actual string) string {
if expected.Route == "" && len(expected.RouteAllowlist) == 0 {
return ""
}
strict := expected.RouteStrictDefault()
if strict {
if expected.Route == "" {
return ""
}
if actual == expected.Route {
return ""
}
return fmt.Sprintf(
"expected route: %q\nactual route: %q\n"+
"(hint: expected.route_strict defaults to true; set "+
"route_strict: false with a route_allowlist if the "+
"fixture is genuinely flexible between routes)",
expected.Route, actual)
}
// Relaxed mode. Empty actual on relaxed mode is "the trailer
// did not fire" (typically an error-path fixture) and is
// treated as a skip; pinning `route: unsupported` on those is
// documentation for the matrix walker, not a hard runner
// assertion.
if actual == "" {
return ""
}
if slices.Contains(expected.RouteAllowlist, actual) {
return ""
}
if expected.Route != "" && actual == expected.Route {
return ""
}
allowed := append([]string{}, expected.RouteAllowlist...)
if expected.Route != "" {
allowed = append([]string{expected.Route}, allowed...)
}
return fmt.Sprintf(
"expected route in: [%s]\nactual route: %q",
strings.Join(allowed, ", "), actual)
}
// resolveProfiles maps the CLI's --profile flag values to a stable
// matrix order. Empty input means "all known profiles".
func resolveProfiles(names []string) ([]Profile, error) {
if len(names) == 0 {
return KnownProfiles(), nil
}
seen := make(map[string]bool, len(names))
out := make([]Profile, 0, len(names))
for _, n := range names {
if seen[n] {
continue
}
seen[n] = true
p, ok := LookupProfile(n)
if !ok {
return nil, fmt.Errorf("unknown --profile %q (known: %s)",
n, strings.Join(profileNames(), ", "))
}
out = append(out, p)
}
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
return out, nil
}
func contains(haystack []string, needle string) bool {
return slices.Contains(haystack, needle)
}
package runner
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"strings"
"syscall"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// SessionOptions configures a session-lane run.
type SessionOptions struct {
SessionsPath string
IncludeSelfTest bool
Harness HarnessOptions
Profiles []string
Output string
Out io.Writer
Err io.Writer
}
// RunSessions executes every session fixture against one long-lived engine per
// session x profile cell.
func RunSessions(ctx context.Context, opts SessionOptions) (*Report, error) {
opts, err := prepareSessionOptions(opts)
if err != nil {
return nil, err
}
sessions, err := LoadSessionDir(opts.SessionsPath, opts.IncludeSelfTest)
if err != nil {
return nil, err
}
if len(sessions) == 0 {
return nil, fmt.Errorf("no sessions found under %s", opts.SessionsPath)
}
enabled, err := resolveProfiles(opts.Profiles)
if err != nil {
return nil, err
}
report := iterateSessionMatrix(ctx, sessions, enabled, opts)
if opts.Output == outputFormatJSON {
if err := writeJSONReport(opts.Out, report); err != nil {
return report, fmt.Errorf("write json report: %w", err)
}
} else {
writeTextSummary(opts.Out, report)
}
return report, nil
}
func prepareSessionOptions(opts SessionOptions) (SessionOptions, error) {
if opts.Out == nil {
opts.Out = os.Stdout
}
if opts.Err == nil {
opts.Err = os.Stderr
}
if opts.Output == "" {
opts.Output = outputFormatText
}
if opts.Output != outputFormatText && opts.Output != outputFormatJSON {
return opts, fmt.Errorf("unknown --output %q (want text or json)", opts.Output)
}
if opts.SessionsPath == "" {
opts.SessionsPath = DefaultSessionsDir
}
return opts, nil
}
func iterateSessionMatrix(ctx context.Context, sessions []*Session, enabled []Profile, opts SessionOptions) *Report {
report := &Report{SchemaVersion: JSONSchemaVersion}
for _, p := range enabled {
for _, sess := range sessions {
if !contains(sess.Profiles, p.Name) {
continue
}
result := runSession(ctx, sess, p, opts)
report.Results = append(report.Results, result)
report.Summary.Total++
switch result.Status {
case StatusPass:
report.Summary.Passed++
case StatusFail:
report.Summary.Failed++
case StatusSkip:
report.Summary.Skipped++
}
if opts.Output == outputFormatText {
writeTextResult(opts.Out, result)
}
}
}
return report
}
func sessionProjectBase(env *EmulatorEnv, projectID string) string {
return env.BaseURL + "/bigquery/v2/projects/" + projectID
}
func runSession(ctx context.Context, sess *Session, p Profile, opts SessionOptions) Result {
started := time.Now()
result := Result{
Fixture: sess.Name,
Path: sess.Path,
Profile: p.Name,
Status: StatusFail,
}
env, startErr := StartEmulator(ctx, opts.Harness, p)
if startErr != nil {
result.Message = "start emulator: " + startErr.Error()
return markDuration(result, started)
}
defer func() { _ = env.Close() }()
defaultDataset := sess.DefaultDataset
for i, step := range sess.Steps {
// Re-read BaseURL each step: RestartEngine replaces the in-process
// gateway httptest server and updates env.BaseURL.
base := sessionProjectBase(env, sess.ProjectID)
if err := executeSessionStep(ctx, env, base, defaultDataset, step, fmt.Sprintf("[%d]", i)); err != nil {
result.Message = err.Error()
return finishSessionMaybeKnown(result, started, sess.KnownFailing)
}
}
result.Status = StatusPass
return markDuration(result, started)
}
func executeSessionStep(
ctx context.Context,
env *EmulatorEnv,
base, defaultDataset string,
step SessionStep,
indexPrefix string,
) error {
if step.Repeat > 0 {
for n := 0; n < step.Repeat; n++ {
for j, nested := range step.Steps {
prefix := fmt.Sprintf("%s.repeat(%d)[%d]", indexPrefix, n, j)
if err := executeSessionStep(ctx, env, base, defaultDataset, nested, prefix); err != nil {
return err
}
}
}
return nil
}
kind, err := step.kind()
if err != nil {
return fmt.Errorf("%s: %w", indexPrefix, err)
}
switch kind {
case stepKindSetup:
if err := RunSetupSteps(ctx, base, env.DataDir(), []SetupStep{step.asSetupStep()}, defaultDataset); err != nil {
return fmt.Errorf("%s: %w", indexPrefix, err)
}
case stepKindREST:
if err := runRESTStep(ctx, base, step.REST); err != nil {
return fmt.Errorf("%s: %w", indexPrefix, err)
}
case stepKindRestart:
if err := env.RestartEngine(ctx); err != nil {
return fmt.Errorf("%s restart: %w", indexPrefix, err)
}
case stepKindQuery:
if err := runSessionQueryStep(ctx, base, defaultDataset, step, indexPrefix); err != nil {
return err
}
case stepKindAssertionOnly:
}
return runSessionAssertions(ctx, env, base, step, indexPrefix)
}
func runSessionQueryStep(
ctx context.Context,
base, defaultDataset string,
step SessionStep,
indexPrefix string,
) error {
dd := defaultDataset
if step.DefaultDataset != "" {
dd = step.DefaultDataset
}
status, body, err := postQueryWithDefaultDataset(ctx, base, step.Query, dd)
if err != nil {
return fmt.Errorf("%s query rpc: %w", indexPrefix, err)
}
if step.ExpectError != nil {
if diff := errorDiff(*step.ExpectError, status, body); diff != "" {
return fmt.Errorf("%s error mismatch: %s", indexPrefix, diff)
}
return nil
}
if status < 200 || status >= 300 {
return fmt.Errorf("%s query failed with HTTP %d: %s",
indexPrefix, status, snippet(body))
}
var run bqtypes.QueryResponse
if err := json.Unmarshal(body, &run); err != nil {
return fmt.Errorf("%s decode QueryResponse: %w", indexPrefix, err)
}
exp := Expectation{Match: MatchOrdered, Rows: step.ExpectRows}
if diff := rowDiff(exp, run.Schema, run.Rows); diff != "" {
return fmt.Errorf("%s row mismatch: %s", indexPrefix, diff)
}
return nil
}
func runSessionAssertions(
ctx context.Context,
env *EmulatorEnv,
base string,
step SessionStep,
indexPrefix string,
) error {
if step.ExpectAlive != nil {
alive := env.EngineAlive()
want := *step.ExpectAlive
if alive != want {
if !alive {
return fmt.Errorf(
"%s expect_alive=true but engine subprocess has exited (signal: aborted or non-zero exit)",
indexPrefix,
)
}
return fmt.Errorf("%s expect_alive=false but engine subprocess is still running", indexPrefix)
}
}
if step.ExpectTableList != nil {
if err := assertTableList(ctx, base, step.ExpectTableList); err != nil {
return fmt.Errorf("%s %w", indexPrefix, err)
}
}
return nil
}
func runRESTStep(ctx context.Context, base string, rest *RESTStep) error {
url, err := resolveRESTURL(base, rest.Path)
if err != nil {
return err
}
var body []byte
if rest.Body != nil {
body, err = json.Marshal(rest.Body)
if err != nil {
return fmt.Errorf("marshal rest body: %w", err)
}
}
status, respBody, err := DoHTTPRequest(ctx, rest.Method, url, body)
if err != nil {
return err
}
want := rest.ExpectStatus
if want == 0 {
if status < 200 || status >= 300 {
return fmt.Errorf("rest %s %s -> %d: %s",
rest.Method, rest.Path, status, snippet(respBody))
}
return nil
}
if status != want {
return fmt.Errorf("rest %s %s -> %d, want %d: %s",
rest.Method, rest.Path, status, want, snippet(respBody))
}
return nil
}
func resolveRESTURL(base, path string) (string, error) {
path = strings.TrimSpace(path)
if path == "" {
return "", errors.New("rest path is empty")
}
if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") {
return path, nil
}
if strings.HasPrefix(path, "/") {
// Absolute from gateway host: strip duplicate /bigquery prefix if present.
if before, _, ok := strings.Cut(base, "/bigquery/"); ok {
return before + path, nil
}
return base + path, nil
}
return strings.TrimSuffix(base, "/") + "/" + strings.TrimPrefix(path, "/"), nil
}
func assertTableList(ctx context.Context, base string, exp *TableListExpect) error {
url := fmt.Sprintf("%s/datasets/%s/tables", base, exp.Dataset)
status, body, err := DoHTTPRequest(ctx, httpMethodGet, url, nil)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("tables.list -> %d: %s", status, snippet(body))
}
ids, err := parseTableListIDs(body)
if err != nil {
return err
}
if diff := tableListDiff(exp, ids); diff != "" {
return fmt.Errorf("expect_table_list: %s", diff)
}
return nil
}
func parseTableListIDs(body []byte) ([]string, error) {
var list struct {
Tables []struct {
TableReference struct {
TableID string `json:"tableId"`
} `json:"tableReference"`
} `json:"tables"`
}
if err := json.Unmarshal(body, &list); err != nil {
return nil, fmt.Errorf("decode tableList: %w", err)
}
out := make([]string, 0, len(list.Tables))
for _, t := range list.Tables {
out = append(out, t.TableReference.TableID)
}
return out, nil
}
func tableListDiff(exp *TableListExpect, actual []string) string {
have := make(map[string]bool, len(actual))
for _, id := range actual {
have[id] = true
}
var missing []string
for _, want := range exp.Contains {
if !have[want] {
missing = append(missing, want)
}
}
var unexpected []string
for _, forbid := range exp.NotContains {
if have[forbid] {
unexpected = append(unexpected, forbid)
}
}
if len(missing) == 0 && len(unexpected) == 0 {
return ""
}
var b strings.Builder
if len(missing) > 0 {
fmt.Fprintf(&b, "missing tables: [%s]; ", strings.Join(missing, ", "))
}
if len(unexpected) > 0 {
fmt.Fprintf(&b, "forbidden tables present: [%s]; ", strings.Join(unexpected, ", "))
}
fmt.Fprintf(&b, "actual table ids: [%s]", strings.Join(actual, ", "))
return strings.TrimSpace(b.String())
}
func finishSessionMaybeKnown(r Result, started time.Time, knownFailing bool) Result {
r = markDuration(r, started)
if knownFailing && r.Status == StatusFail {
r.Status = StatusSkip
r.Message = "known_failing (expected divergence): " + r.Message
}
return r
}
// EngineAlive reports whether the spawned engine subprocess is still running.
// Connected-mode envs (no subprocess) always return true.
func (e *EmulatorEnv) EngineAlive() bool {
if e == nil || e.cmd == nil || e.cmd.Process == nil {
return true
}
if e.cmd.ProcessState != nil && e.cmd.ProcessState.Exited() {
return false
}
if err := e.cmd.Process.Signal(syscall.Signal(0)); err != nil {
return false
}
return true
}
package runner
import (
"errors"
"fmt"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// Session is the in-memory shape of a multi-step session YAML file under
// conformance/sessions/. Every step runs against a single long-lived engine
// process unless a restart step stops and relaunches it (same --data_dir).
type Session struct {
Name string `yaml:"name"`
Description string `yaml:"description,omitempty"`
Profiles []string `yaml:"profiles,omitempty"`
ProjectID string `yaml:"project_id,omitempty"`
DefaultDataset string `yaml:"default_dataset,omitempty"`
KnownFailing bool `yaml:"known_failing,omitempty"`
KnownFailingRef string `yaml:"known_failing_ref,omitempty"`
Steps []SessionStep `yaml:"steps"`
Path string `yaml:"-"`
}
// SessionStep is one ordered operation or assertion in a session. Setup-style
// fields mirror Fixture.Setup; assertion fields may stand alone or attach to
// a query step. A repeat group sets Repeat and nested Steps.
type SessionStep struct {
Dataset string `yaml:"dataset,omitempty"`
Table *TableSetup `yaml:"table,omitempty"`
Rows *RowsSetup `yaml:"rows,omitempty"`
SQL string `yaml:"sql,omitempty"`
Query string `yaml:"query,omitempty"`
DefaultDataset string `yaml:"default_dataset,omitempty"`
REST *RESTStep `yaml:"rest,omitempty"`
ExpectRows []map[string]any `yaml:"expect_rows,omitempty"`
ExpectError *ExpectedError `yaml:"expect_error,omitempty"`
ExpectTableList *TableListExpect `yaml:"expect_table_list,omitempty"`
ExpectAlive *bool `yaml:"expect_alive,omitempty"`
Repeat int `yaml:"repeat,omitempty"`
Steps []SessionStep `yaml:"steps,omitempty"`
Restart bool `yaml:"restart,omitempty"`
}
// RESTStep is a generic gateway REST call (method + path + optional JSON body).
type RESTStep struct {
Method string `yaml:"method"`
Path string `yaml:"path"`
Body map[string]any `yaml:"body,omitempty"`
ExpectStatus int `yaml:"expect_status,omitempty"`
}
// TableListExpect asserts tables.list contains (or omits) table IDs.
type TableListExpect struct {
Dataset string `yaml:"dataset"`
Contains []string `yaml:"contains,omitempty"`
NotContains []string `yaml:"not_contains,omitempty"`
}
// DefaultSessionsDir is the committed session fixture root.
const DefaultSessionsDir = "conformance/sessions"
// LoadSession parses one session YAML file.
func LoadSession(path string) (*Session, error) {
data, err := os.ReadFile(path) //nolint:gosec // path is CLI-controlled
if err != nil {
return nil, fmt.Errorf("read %s: %w", path, err)
}
return loadSessionBytes(data, path)
}
func loadSessionBytes(data []byte, path string) (*Session, error) {
var s Session
dec := yaml.NewDecoder(strings.NewReader(string(data)))
dec.KnownFields(true)
if err := dec.Decode(&s); err != nil {
return nil, fmt.Errorf("parse %s: %w", path, err)
}
s.Path = path
if err := s.normalize(); err != nil {
return nil, fmt.Errorf("validate %s: %w", path, err)
}
return &s, nil
}
// LoadSessionDir walks a directory (or loads a single file) and returns every
// loadable session, sorted by path. Directories whose basename starts with "_"
// are skipped unless includeSelfTest is true.
func LoadSessionDir(pathOrDir string, includeSelfTest bool) ([]*Session, error) {
info, err := os.Stat(pathOrDir)
if err != nil {
return nil, fmt.Errorf("stat %s: %w", pathOrDir, err)
}
if !info.IsDir() {
s, err := LoadSession(pathOrDir)
if err != nil {
return nil, err
}
return []*Session{s}, nil
}
var sessions []*Session
walkErr := filepath.Walk(pathOrDir, func(p string, fi os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if fi.IsDir() {
base := filepath.Base(p)
if base != filepath.Base(pathOrDir) && strings.HasPrefix(base, "_") {
if includeSelfTest {
return nil
}
return filepath.SkipDir
}
return nil
}
if !includeSelfTest && strings.HasPrefix(filepath.Base(p), "_") {
return nil
}
ext := strings.ToLower(filepath.Ext(p))
if ext != ".yaml" && ext != ".yml" {
return nil
}
s, err := LoadSession(p)
if err != nil {
return err
}
sessions = append(sessions, s)
return nil
})
if walkErr != nil {
return nil, walkErr
}
sort.Slice(sessions, func(i, j int) bool { return sessions[i].Path < sessions[j].Path })
return sessions, nil
}
func (s *Session) normalize() error {
if strings.TrimSpace(s.Name) == "" {
return errors.New("name is required")
}
if len(s.Steps) == 0 {
return errors.New("steps must list at least one entry")
}
if s.ProjectID == "" {
s.ProjectID = "proj-session-" + sanitizeID(s.Name)
}
if len(s.Profiles) == 0 {
s.Profiles = append([]string(nil), defaultProfiles...)
}
known := make(map[string]bool, len(KnownProfiles()))
for _, p := range KnownProfiles() {
known[p.Name] = true
}
for _, p := range s.Profiles {
if !known[p] {
return fmt.Errorf("unknown profile %q (known: %s)",
p, strings.Join(profileNames(), ", "))
}
}
for i, step := range s.Steps {
if err := step.validate(strconv.Itoa(i)); err != nil {
return err
}
}
return nil
}
func (step *SessionStep) validate(indexPrefix string) error {
if step.Repeat > 0 {
if len(step.Steps) == 0 {
return fmt.Errorf("steps[%s]: repeat requires nested steps", indexPrefix)
}
for i, nested := range step.Steps {
if err := nested.validate(fmt.Sprintf("%s.repeat[%d]", indexPrefix, i)); err != nil {
return err
}
}
return nil
}
kind, err := step.kind()
if err != nil {
return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
}
switch kind {
case stepKindQuery:
if len(step.ExpectRows) == 0 && step.ExpectError == nil {
return fmt.Errorf("steps[%s]: query step requires expect_rows or expect_error", indexPrefix)
}
case stepKindAssertionOnly:
if step.ExpectAlive == nil && step.ExpectTableList == nil && step.ExpectError == nil {
return fmt.Errorf(
"steps[%s]: assertion step must set expect_alive, expect_table_list, or expect_error",
indexPrefix,
)
}
case stepKindSetup:
if err := step.asSetupStep().validate(); err != nil {
return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
}
case stepKindREST:
if err := step.REST.validate(); err != nil {
return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
}
case stepKindRestart:
if step.Restart {
return nil
}
}
if step.ExpectTableList != nil {
if err := step.ExpectTableList.validate(); err != nil {
return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
}
}
return nil
}
func (r *RESTStep) validate() error {
if strings.TrimSpace(r.Method) == "" {
return errors.New("rest.method is required")
}
if strings.TrimSpace(r.Path) == "" {
return errors.New("rest.path is required")
}
return nil
}
func (e *TableListExpect) validate() error {
if e.Dataset == "" {
return errors.New("expect_table_list.dataset is required")
}
if len(e.Contains) == 0 && len(e.NotContains) == 0 {
return errors.New("expect_table_list must set contains and/or not_contains")
}
return nil
}
type sessionStepKind int
const (
stepKindSetup sessionStepKind = iota
stepKindQuery
stepKindREST
stepKindRestart
stepKindAssertionOnly
)
func (step *SessionStep) kind() (sessionStepKind, error) {
if step.Repeat > 0 {
return 0, errors.New("repeat groups are handled separately")
}
count := 0
var kind sessionStepKind
if step.Dataset != "" || step.Table != nil || step.Rows != nil || strings.TrimSpace(step.SQL) != "" {
count++
kind = stepKindSetup
}
if strings.TrimSpace(step.Query) != "" {
count++
kind = stepKindQuery
}
if step.REST != nil {
count++
kind = stepKindREST
}
if step.Restart {
count++
kind = stepKindRestart
}
if count == 0 {
if step.ExpectAlive != nil || step.ExpectTableList != nil || step.ExpectError != nil {
return stepKindAssertionOnly, nil
}
return 0, errors.New("empty step")
}
if count > 1 {
return 0, errors.New("step must set exactly one operation")
}
return kind, nil
}
func (step *SessionStep) asSetupStep() SetupStep {
return SetupStep{
Dataset: step.Dataset,
Table: step.Table,
Rows: step.Rows,
SQL: step.SQL,
RowAccessPolicy: nil,
ColumnGovernance: nil,
}
}
package runner
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/external/connectionfixture"
)
// RunSetupSteps executes every setup step against the gateway base URL.
// Exported for sub-lanes (differential replay, production recorders).
func RunSetupSteps(ctx context.Context, base string, dataDir string, steps []SetupStep, defaultDataset string) error {
for i, step := range steps {
if err := runSetupStep(ctx, base, dataDir, step, defaultDataset); err != nil {
return fmt.Errorf("setup[%d]: %w", i, err)
}
}
return nil
}
// ValidateExported exposes setup-step validation for corpus loaders.
func (s SetupStep) ValidateExported() error {
return s.validate()
}
// runSetupStep dispatches one setup step to the matching helper.
// Errors bubble up unchanged; the caller wraps them with the step
// index for the diff message.
func runSetupStep(ctx context.Context, base string, dataDir string, step SetupStep, defaultDataset string) error {
switch {
case step.Dataset != "":
return setupDataset(ctx, base, step.Dataset)
case step.Table != nil:
return setupTable(ctx, base, step.Table)
case step.Rows != nil:
return setupRows(ctx, base, step.Rows)
case strings.TrimSpace(step.SQL) != "":
return setupSQL(ctx, base, step.SQL, defaultDataset)
case step.RowAccessPolicy != nil:
return setupRowAccessPolicy(ctx, base, step.RowAccessPolicy)
case step.ColumnGovernance != nil:
return setupColumnGovernance(ctx, base, step.ColumnGovernance)
case step.ConnectionFixture != nil:
return setupConnectionFixture(dataDir, step.ConnectionFixture)
default:
return errors.New("empty setup step (validated at load time)")
}
}
func setupConnectionFixture(dataDir string, cf *ConnectionFixtureSetup) error {
if dataDir == "" {
return errors.New("connection_fixture requires a spawned emulator data_dir")
}
src := cf.SourceDir
if !filepath.IsAbs(src) {
src = filepath.Join(repoRoot(), src)
}
return connectionfixture.CopyTree(dataDir, cf.ConnectionID, src)
}
func repoRoot() string {
cwd, err := os.Getwd()
if err != nil {
return "."
}
return cwd
}
// setupDataset issues a `datasets.insert` for the synthesized
// fixture project / dataset pair. Location is hardcoded to US to
// match the gateway's default; fixtures that want a different
// location have to use a SQL setup step.
func setupDataset(ctx context.Context, base, dataset string) error {
body := fmt.Sprintf(
`{"datasetReference":{"projectId":"%s","datasetId":"%s"},"location":"US"}`,
projectIDFromBase(base), dataset)
status, respBody, err := doRequest(ctx, base+"/datasets", []byte(body))
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("datasets.insert -> %d: %s", status, snippet(respBody))
}
return nil
}
// setupTable issues a `tables.insert` with the fixture's column
// schema. STRUCT children round-trip through columnToTableField.
func setupTable(ctx context.Context, base string, t *TableSetup) error {
tableBody := struct {
TableReference bqtypes.TableReference `json:"tableReference"`
Schema *struct {
Fields []bqtypes.TableFieldSchema `json:"fields"`
} `json:"schema,omitempty"`
ExternalDataConfiguration *bqtypes.ExternalDataConfiguration `json:"externalDataConfiguration,omitempty"`
View *bqtypes.ViewDefinition `json:"view,omitempty"`
}{}
tableBody.TableReference = bqtypes.TableReference{
ProjectID: projectIDFromBase(base),
DatasetID: t.Dataset,
TableID: t.ID,
}
if t.External != nil {
tableBody.ExternalDataConfiguration = &bqtypes.ExternalDataConfiguration{
SourceFormat: t.External.SourceFormat,
SourceURIs: append([]string(nil), t.External.SourceURIs...),
Autodetect: t.External.Autodetect,
}
}
if t.View != nil {
tableBody.View = &bqtypes.ViewDefinition{Query: t.View.Query}
}
if len(t.Schema) > 0 {
tableBody.Schema = &struct {
Fields []bqtypes.TableFieldSchema `json:"fields"`
}{}
for _, c := range t.Schema {
tableBody.Schema.Fields = append(tableBody.Schema.Fields,
columnToTableField(c))
}
}
jsonBody, err := json.Marshal(tableBody)
if err != nil {
return fmt.Errorf("marshal table body: %w", err)
}
url := fmt.Sprintf("%s/datasets/%s/tables", base, t.Dataset)
status, respBody, err := doRequest(ctx, url, jsonBody)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("tables.insert -> %d: %s", status, snippet(respBody))
}
return nil
}
// setupRows issues a `tabledata.insertAll`. It is the only way to
// seed rows on the DuckDB engine today: INSERT VALUES returns
// UNIMPLEMENTED. The wire shape matches Google's REST API spec
// (each row is wrapped in `{json: {...}}`).
func setupRows(ctx context.Context, base string, rs *RowsSetup) error {
type insertAllRow struct {
JSON map[string]any `json:"json"`
}
body := struct {
Kind string `json:"kind"`
Rows []insertAllRow `json:"rows"`
}{
Kind: "bigquery#tableDataInsertAllRequest",
Rows: make([]insertAllRow, 0, len(rs.Rows)),
}
for _, r := range rs.Rows {
body.Rows = append(body.Rows, insertAllRow{JSON: r})
}
jsonBody, err := json.Marshal(body)
if err != nil {
return fmt.Errorf("marshal insertAll body: %w", err)
}
url := fmt.Sprintf("%s/datasets/%s/tables/%s/insertAll",
base, rs.Dataset, rs.Table)
status, respBody, err := doRequest(ctx, url, jsonBody)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("tabledata.insertAll -> %d: %s",
status, snippet(respBody))
}
return nil
}
// setupSQL runs an arbitrary statement through the gateway's
// `/queries` endpoint. Used for setup phases that do not fit the
// dataset/table/rows shape (e.g. preparing a temp UDF).
func setupSQL(ctx context.Context, base, sql, defaultDataset string) error {
queryBody, err := marshalJobsQueryBody(sql, defaultDataset)
if err != nil {
return err
}
status, respBody, err := doRequest(ctx, base+"/queries", queryBody)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("setup sql -> %d: %s", status, snippet(respBody))
}
return nil
}
// columnToTableField copies our YAML-decoded SchemaColumn onto the
// `bqtypes.TableFieldSchema` wire shape, recursing for STRUCT
// children so nested fields round-trip cleanly.
func columnToTableField(c SchemaColumn) bqtypes.TableFieldSchema {
out := bqtypes.TableFieldSchema{
Name: c.Name,
Type: c.Type,
Mode: c.Mode,
Description: c.Description,
}
if len(c.PolicyTags) > 0 {
out.PolicyTags = &bqtypes.PolicyTagList{Names: append([]string(nil), c.PolicyTags...)}
}
for _, f := range c.Fields {
out.Fields = append(out.Fields, columnToTableField(f))
}
return out
}
func setupRowAccessPolicy(ctx context.Context, base string, rap *RowAccessPolicySetup) error {
body := map[string]any{
"rowAccessPolicyReference": map[string]string{
"projectId": projectIDFromBase(base),
"datasetId": rap.Dataset,
"tableId": rap.Table,
"policyId": rap.PolicyID,
},
"filterPredicate": rap.FilterPredicate,
}
if len(rap.Grantees) > 0 {
body["grantees"] = rap.Grantees
}
jsonBody, err := json.Marshal(body)
if err != nil {
return fmt.Errorf("marshal row access policy: %w", err)
}
url := fmt.Sprintf("%s/datasets/%s/tables/%s/rowAccessPolicies", base, rap.Dataset, rap.Table)
status, respBody, err := doRequest(ctx, url, jsonBody)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("rowAccessPolicies.insert -> %d: %s", status, snippet(respBody))
}
return nil
}
func setupColumnGovernance(ctx context.Context, base string, cg *ColumnGovernanceSetup) error {
field := map[string]any{
"name": cg.Column,
"type": "STRING",
"maskKind": cg.MaskKind,
}
if cg.PolicyTag != "" {
field["policyTags"] = map[string]any{"names": []string{cg.PolicyTag}}
}
patchBody := map[string]any{
"schema": map[string]any{"fields": []map[string]any{field}},
}
jsonBody, err := json.Marshal(patchBody)
if err != nil {
return fmt.Errorf("marshal column governance patch: %w", err)
}
url := fmt.Sprintf("%s/datasets/%s/tables/%s", base, cg.Dataset, cg.Table)
status, respBody, err := doPatchRequest(ctx, url, jsonBody)
if err != nil {
return err
}
if status < 200 || status >= 300 {
return fmt.Errorf("tables.patch column governance -> %d: %s", status, snippet(respBody))
}
return nil
}
// projectIDFromBase pulls the projectId from a URL of the form
// .../bigquery/v2/projects/<projectId>. Returning the inner segment
// keeps the setup-step builders from having to thread projectId
// through their signatures.
func projectIDFromBase(base string) string {
const marker = "/projects/"
i := strings.LastIndex(base, marker)
if i < 0 {
return ""
}
return base[i+len(marker):]
}
package bqtypes
import "fmt"
// EncryptionConfiguration is the BigQuery REST encryptionConfiguration
// sub-object on tables and load-job destinationEncryptionConfiguration.
// The emulator stores kmsKeyName as opaque metadata only; it does not
// call Cloud KMS.
type EncryptionConfiguration struct {
KMSKeyName string `json:"kmsKeyName,omitempty"`
}
// EmulatorCMEKKeyUSCentral returns a stable KMS crypto key resource name
// for regional CMEK samples (matches bqtestutil.EmulatorCMEKKeyUSCentral).
func EmulatorCMEKKeyUSCentral(projectID, cryptoKeyID string) string {
return fmt.Sprintf(
"projects/%s/locations/us-central1/keyRings/emulator/cryptoKeys/%s",
projectID, cryptoKeyID,
)
}
package bqtypes
import (
"bytes"
"encoding/json"
"fmt"
"strconv"
"strings"
)
const (
parameterTypeStruct = "STRUCT"
parameterTypeArray = "ARRAY"
)
// UnmarshalJSON accepts BigQuery REST parameter values where `value`
// may be encoded as a JSON string, number, or bool. The engine expects
// a decimal string in `value_json`; scalars are normalized here.
func (v *QueryParameterValue) UnmarshalJSON(data []byte) error {
type wireQueryParameterValue struct {
Value json.RawMessage `json:"value"`
ArrayValues []json.RawMessage `json:"arrayValues"`
StructValues map[string]json.RawMessage `json:"structValues"`
}
var raw wireQueryParameterValue
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
if raw.Value != nil {
v.Value = normalizeParameterScalarJSON(raw.Value)
}
if len(raw.ArrayValues) > 0 {
v.ArrayValues = make([]QueryParameterValue, 0, len(raw.ArrayValues))
for _, elem := range raw.ArrayValues {
var nested QueryParameterValue
if err := json.Unmarshal(elem, &nested); err != nil {
// Element may be a bare scalar (number/bool/string).
nested.Value = normalizeParameterScalarJSON(elem)
}
v.ArrayValues = append(v.ArrayValues, nested)
}
}
if len(raw.StructValues) > 0 {
v.StructValues = make(map[string]QueryParameterValue, len(raw.StructValues))
for name, field := range raw.StructValues {
var nested QueryParameterValue
if err := json.Unmarshal(field, &nested); err != nil {
nested.Value = normalizeParameterScalarJSON(field)
}
v.StructValues[name] = nested
}
}
return nil
}
// ValueJSON returns the JSON literal string forwarded to the engine
// as `value_json`. Scalars use the normalized `value` field; ARRAY
// and STRUCT parameters marshal the nested shape.
func (v *QueryParameterValue) ValueJSON() string {
if v == nil {
return ""
}
if len(v.ArrayValues) > 0 {
elems := make([]json.RawMessage, 0, len(v.ArrayValues))
for i := range v.ArrayValues {
elems = append(elems, json.RawMessage(v.ArrayValues[i].marshalParameterJSON()))
}
raw, err := json.Marshal(elems)
if err != nil {
return "[]"
}
return string(raw)
}
if len(v.StructValues) > 0 {
obj := make(map[string]json.RawMessage, len(v.StructValues))
for name, field := range v.StructValues {
obj[name] = json.RawMessage(field.marshalParameterJSON())
}
raw, err := json.Marshal(obj)
if err != nil {
return "{}"
}
return string(raw)
}
return v.Value
}
// ParameterTypeWire returns the engine `type_kind` and optional
// `type_json` descriptor for a REST query parameter type.
func ParameterTypeWire(t *QueryParameterType) (typeKind, typeJSON string) {
if t == nil {
return "", ""
}
switch t.Type {
case parameterTypeStruct:
if len(t.StructTypes) == 0 {
return parameterTypeStruct, ""
}
parts := make([]string, 0, len(t.StructTypes))
for _, st := range t.StructTypes {
fk, _ := ParameterTypeWire(&st.Type)
parts = append(parts, st.Name+":"+fk)
}
return parameterTypeStruct, strings.Join(parts, ",")
case parameterTypeArray:
if t.ArrayType == nil {
return parameterTypeArray, ""
}
elemKind, elemJSON := ParameterTypeWire(t.ArrayType)
if elemKind == parameterTypeStruct {
return parameterTypeArray, parameterTypeStruct + ":" + elemJSON
}
return parameterTypeArray, elemKind
default:
return t.Type, ""
}
}
// ParameterValueWire returns the JSON literal forwarded as engine
// `value_json`. STRUCT parameters use a positional JSON array aligned
// with `parameterType.structTypes`.
func ParameterValueWire(pt *QueryParameterType, v *QueryParameterValue) string {
if v == nil {
return ""
}
if pt != nil && pt.Type == parameterTypeStruct && len(pt.StructTypes) > 0 &&
len(v.StructValues) > 0 {
elems := make([]json.RawMessage, 0, len(pt.StructTypes))
for _, st := range pt.StructTypes {
fv := v.StructValues[st.Name]
elems = append(elems, json.RawMessage(fv.marshalParameterJSON()))
}
raw, err := json.Marshal(elems)
if err != nil {
return "[]"
}
return string(raw)
}
return v.ValueJSON()
}
func (v QueryParameterValue) marshalParameterJSON() []byte {
if len(v.ArrayValues) > 0 || len(v.StructValues) > 0 {
raw, err := json.Marshal(v)
if err != nil {
return []byte("null")
}
return raw
}
if v.Value == "" {
return []byte("null")
}
// Re-marshal through json so numeric/bool strings become proper JSON.
var decoded any
if err := json.Unmarshal([]byte(v.Value), &decoded); err == nil {
raw, err := json.Marshal(decoded)
if err == nil {
return raw
}
}
return []byte(strconv.Quote(v.Value))
}
// normalizeParameterScalarJSON converts a JSON scalar token into the
// decimal-string form the C++ parameter parser expects.
func normalizeParameterScalarJSON(raw json.RawMessage) string {
trimmed := bytes.TrimSpace(raw)
if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) {
return ""
}
var asString string
if err := json.Unmarshal(trimmed, &asString); err == nil {
return asString
}
var asBool bool
if err := json.Unmarshal(trimmed, &asBool); err == nil {
return strconv.FormatBool(asBool)
}
var asInt int64
if err := json.Unmarshal(trimmed, &asInt); err == nil {
return strconv.FormatInt(asInt, 10)
}
var asFloat float64
if err := json.Unmarshal(trimmed, &asFloat); err == nil {
return strconv.FormatFloat(asFloat, 'f', -1, 64)
}
return string(trimmed)
}
// ParseQueryParameters unmarshals a queryParameters JSON array, used
// by unit tests and any handler that decodes parameters outside the
// main QueryRequest body.
func ParseQueryParameters(data []byte) ([]QueryParameter, error) {
var params []QueryParameter
if len(data) == 0 {
return nil, nil
}
if err := json.Unmarshal(data, ¶ms); err != nil {
return nil, fmt.Errorf("parse queryParameters: %w", err)
}
return params, nil
}
package bqtypes
import (
"bytes"
"encoding/json"
"fmt"
"strconv"
)
// RangePartitioning describes BigQuery integer-range partitioning. The
// only currently-supported `Range.Interval` granularity is integer
// buckets (`start`, `end`, `interval`); the field is just round-tripped
// for now.
type RangePartitioning struct {
Field string `json:"field,omitempty"`
Range *RangePartSpec `json:"range,omitempty"`
}
// RangePartSpec is the `range` sub-object of RangePartitioning. All
// three integer fields are wire-serialized as decimal strings to mirror
// BigQuery REST. See docs/bigquery/docs/reference/rest/v2/tables/get.md.
type RangePartSpec struct {
Start string `json:"start,omitempty"`
End string `json:"end,omitempty"`
Interval string `json:"interval,omitempty"`
}
// UnmarshalJSON accepts JSON numbers because the Node client posts
// range.start as a number on tables.insert.
func (r *RangePartSpec) UnmarshalJSON(data []byte) error {
var raw struct {
Start json.RawMessage `json:"start,omitempty"`
End json.RawMessage `json:"end,omitempty"`
Interval json.RawMessage `json:"interval,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
var err error
if r.Start, err = unmarshalRangeIntString(raw.Start); err != nil {
return fmt.Errorf("start: %w", err)
}
if r.End, err = unmarshalRangeIntString(raw.End); err != nil {
return fmt.Errorf("end: %w", err)
}
if r.Interval, err = unmarshalRangeIntString(raw.Interval); err != nil {
return fmt.Errorf("interval: %w", err)
}
return nil
}
func unmarshalRangeIntString(raw json.RawMessage) (string, error) {
if raw == nil {
return "", nil
}
trim := bytes.TrimSpace(raw)
if len(trim) == 0 || bytes.Equal(trim, []byte("null")) {
return "", nil
}
if trim[0] == '"' {
var s string
if err := json.Unmarshal(trim, &s); err != nil {
return "", err
}
return s, nil
}
var n json.Number
if err := json.Unmarshal(trim, &n); err != nil {
return "", err
}
i, err := n.Int64()
if err != nil {
return "", err
}
return strconv.FormatInt(i, 10), nil
}
package bqtypes
import (
"encoding/json"
"fmt"
)
// RoutineLanguage is the routine language on the wire. Gapic v2 REST may
// send the enum as a string ("SQL") or as a numeric proto enum (1).
type RoutineLanguage string
func routineLanguageFromNumeric(n int) (RoutineLanguage, bool) {
switch n {
case 1:
return "SQL", true
case 2:
return "JAVASCRIPT", true
case 3:
return "PYTHON", true
case 4:
return "JAVA", true
case 5:
return "SCALA", true
default:
return "", false
}
}
// UnmarshalJSON accepts string enum names or numeric gapic v2 values.
func (l *RoutineLanguage) UnmarshalJSON(data []byte) error {
if string(data) == jsonNullLiteral {
*l = ""
return nil
}
var raw any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
switch v := raw.(type) {
case string:
*l = RoutineLanguage(v)
return nil
case float64:
if lang, ok := routineLanguageFromNumeric(int(v)); ok {
*l = lang
return nil
}
return fmt.Errorf("bqtypes: unknown language enum value %d", int(v))
default:
return fmt.Errorf("bqtypes: language must be string or number, got %T", raw)
}
}
package bqtypes
import (
"encoding/json"
"fmt"
)
// RoutineReference is a stable handle to a routine (UDF / TVF / procedure).
type RoutineReference struct {
ProjectID string `json:"projectId"`
DatasetID string `json:"datasetId"`
RoutineID string `json:"routineId"`
}
// StandardSqlDataType mirrors the BigQuery REST StandardSqlDataType
// resource. See docs/bigquery/docs/reference/rest/v2/StandardSqlDataType.md.
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlDataType struct {
TypeKind SQLTypeKind `json:"typeKind"`
ArrayElementType *StandardSqlDataType `json:"arrayElementType,omitempty"`
StructType *StandardSqlStructType `json:"structType,omitempty"`
RangeElementType *StandardSqlDataType `json:"rangeElementType,omitempty"`
}
// StandardSqlStructType is the struct sub-object of StandardSqlDataType.
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlStructType struct {
Fields []StandardSqlField `json:"fields,omitempty"`
}
// StandardSqlField is one field of a STRUCT type.
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlField struct {
Name string `json:"name"`
Type StandardSqlDataType `json:"type"`
}
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlTableType struct {
Columns []StandardSqlField `json:"columns,omitempty"`
}
// RoutineArgument is an input/output argument of a routine.
type RoutineArgument struct {
Name string `json:"name,omitempty"`
ArgumentKind string `json:"argumentKind,omitempty"`
Mode string `json:"mode,omitempty"`
DataType *StandardSqlDataType `json:"dataType,omitempty"`
}
// RoutineType is the fine-grained routine kind on the wire. Gapic v2
// REST may send the enum as a string ("SCALAR_FUNCTION") or as a
// numeric proto enum (1 = SCALAR_FUNCTION, 2 = PROCEDURE, …).
type RoutineType string
func routineTypeFromNumeric(n int) (RoutineType, bool) {
switch n {
case 1:
return "SCALAR_FUNCTION", true
case 2:
return "PROCEDURE", true
case 3:
return "TABLE_VALUED_FUNCTION", true
default:
return "", false
}
}
// UnmarshalJSON accepts string enum names or numeric gapic v2 values.
func (t *RoutineType) UnmarshalJSON(data []byte) error {
if string(data) == jsonNullLiteral {
*t = ""
return nil
}
var raw any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
switch v := raw.(type) {
case string:
*t = RoutineType(v)
return nil
case float64:
if rt, ok := routineTypeFromNumeric(int(v)); ok {
*t = rt
return nil
}
return fmt.Errorf("bqtypes: unknown routineType enum value %d", int(v))
default:
return fmt.Errorf("bqtypes: routineType must be string or number, got %T", raw)
}
}
// Routine is the BigQuery Routine resource (subset).
// See docs/bigquery/docs/reference/rest/v2/routines.md.
type Routine struct {
Etag string `json:"etag,omitempty"`
RoutineReference RoutineReference `json:"routineReference"`
RoutineType RoutineType `json:"routineType,omitempty"`
CreationTime string `json:"creationTime,omitempty"`
LastModifiedTime string `json:"lastModifiedTime,omitempty"`
Language RoutineLanguage `json:"language,omitempty"`
Arguments []RoutineArgument `json:"arguments,omitempty"`
ReturnType *StandardSqlDataType `json:"returnType,omitempty"`
ReturnTableType *StandardSqlTableType `json:"returnTableType,omitempty"`
DefinitionBody string `json:"definitionBody,omitempty"`
Description string `json:"description,omitempty"`
StrictMode *bool `json:"strictMode,omitempty"`
PythonOptions *PythonOptions `json:"pythonOptions,omitempty"`
}
// PythonOptions mirrors the BigQuery REST PythonOptions resource for
// LANGUAGE python routines.
type PythonOptions struct {
EntryPoint string `json:"entryPoint,omitempty"`
Packages []string `json:"packages,omitempty"`
}
package bqtypes
// ApplyDefaultCollationToStringFields stamps `collation` onto STRING (and
// STRING-like) top-level schema fields when the table carries a
// `defaultCollation` and the field does not already specify one.
func ApplyDefaultCollationToStringFields(schema *TableSchema, defaultCollation string) *TableSchema {
if schema == nil || defaultCollation == "" || len(schema.Fields) == 0 {
return schema
}
out := *schema
out.Fields = make([]TableFieldSchema, len(schema.Fields))
for i, f := range schema.Fields {
out.Fields[i] = applyDefaultCollationField(f, defaultCollation)
}
return &out
}
func applyDefaultCollationField(f TableFieldSchema, defaultCollation string) TableFieldSchema {
out := f
if f.Collation == "" && isStringLikeFieldType(f.Type) {
out.Collation = defaultCollation
}
if len(f.Fields) > 0 {
nested := make([]TableFieldSchema, len(f.Fields))
for i, sub := range f.Fields {
nested[i] = applyDefaultCollationField(sub, defaultCollation)
}
out.Fields = nested
}
return out
}
func isStringLikeFieldType(t string) bool {
switch t {
case "STRING", "JSON", "GEOGRAPHY":
return true
default:
return false
}
}
package bqtypes
// PolicyTagList is the policyTags sub-object on TableFieldSchema.
type PolicyTagList struct {
Names []string `json:"names,omitempty"`
}
// ExtractSchemaPolicyOverlay copies only policyTags-bearing fields from
// a REST schema so the gateway metadata store can round-trip column ACLs
// without shadowing engine-owned column types.
func ExtractSchemaPolicyOverlay(s *TableSchema) *TableSchema {
if s == nil || len(s.Fields) == 0 {
return nil
}
fields := extractPolicyFields(s.Fields)
if len(fields) == 0 {
return nil
}
return &TableSchema{Fields: fields}
}
func extractPolicyFields(fields []TableFieldSchema) []TableFieldSchema {
out := make([]TableFieldSchema, 0, len(fields))
for _, f := range fields {
nested := extractPolicyFields(f.Fields)
if f.PolicyTags != nil && len(f.PolicyTags.Names) > 0 ||
f.Collation != "" || f.DefaultValueExpression != "" || len(nested) > 0 {
out = append(out, TableFieldSchema{
Name: f.Name,
Collation: f.Collation,
PolicyTags: f.PolicyTags,
DefaultValueExpression: f.DefaultValueExpression,
Fields: nested,
})
continue
}
}
return out
}
// MergeSchemaPolicyTags overlays cached policyTags onto the engine schema
// returned by tables.get.
func MergeSchemaPolicyTags(base, overlay *TableSchema) *TableSchema {
if base == nil {
return overlay
}
if overlay == nil || len(overlay.Fields) == 0 {
return base
}
merged := *base
merged.Fields = mergeFieldPolicyTags(base.Fields, overlay.Fields)
return &merged
}
func mergeFieldPolicyTags(base, overlay []TableFieldSchema) []TableFieldSchema {
if len(base) == 0 {
return overlay
}
byName := map[string]TableFieldSchema{}
for _, f := range overlay {
byName[f.Name] = f
}
out := append([]TableFieldSchema(nil), base...)
for i, f := range base {
out[i] = f
ov, ok := byName[f.Name]
if !ok {
continue
}
if ov.PolicyTags != nil {
out[i].PolicyTags = ov.PolicyTags
}
if ov.Collation != "" {
out[i].Collation = ov.Collation
}
if ov.DefaultValueExpression != "" {
out[i].DefaultValueExpression = ov.DefaultValueExpression
}
if len(f.Fields) > 0 || len(ov.Fields) > 0 {
out[i].Fields = mergeFieldPolicyTags(f.Fields, ov.Fields)
}
}
for _, ov := range overlay {
if _, ok := indexFieldByName(base, ov.Name); ok {
continue
}
out = append(out, ov)
}
return out
}
func indexFieldByName(fields []TableFieldSchema, name string) (TableFieldSchema, bool) {
for _, f := range fields {
if f.Name == name {
return f, true
}
}
return TableFieldSchema{}, false
}
package bqtypes
import (
"encoding/json"
"fmt"
)
// SQLTypeKind is StandardSqlDataType.typeKind on the wire. Gapic v2 REST
// may send the enum as a string ("INT64") or as a numeric proto enum (2).
type SQLTypeKind string
func sqlTypeKindFromNumeric(n int) (SQLTypeKind, bool) {
switch n {
case 2:
return "INT64", true
case 5:
return "BOOL", true
case 7:
return "FLOAT64", true
case 8:
return "STRING", true
case 9:
return "BYTES", true
case 10:
return "DATE", true
case 16:
return "ARRAY", true
case 17:
return "STRUCT", true
case 19:
return "TIMESTAMP", true
case 20:
return "TIME", true
case 21:
return "DATETIME", true
case 22:
return "GEOGRAPHY", true
case 23:
return "NUMERIC", true
case 24:
return "BIGNUMERIC", true
case 25:
return "JSON", true
case 26:
return "INTERVAL", true
default:
return "", false
}
}
// UnmarshalJSON accepts string enum names or numeric gapic v2 values.
func (t *SQLTypeKind) UnmarshalJSON(data []byte) error {
if string(data) == jsonNullLiteral {
*t = ""
return nil
}
var raw any
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
switch v := raw.(type) {
case string:
*t = SQLTypeKind(v)
return nil
case float64:
if tk, ok := sqlTypeKindFromNumeric(int(v)); ok {
*t = tk
return nil
}
return fmt.Errorf("bqtypes: unknown typeKind enum value %d", int(v))
default:
return fmt.Errorf("bqtypes: typeKind must be string or number, got %T", raw)
}
}
// Package bqtypes contains wire-compatible Go structs for the small slice
// of the BigQuery v2 REST API the emulator currently understands.
//
// We do not re-generate these from the official Discovery doc yet; this
// hand-written subset is enough to compile and exercise the route table.
// As we flesh out handlers, types here can be replaced by generated code
// (e.g. via `google.golang.org/api/bigquery/v2`'s generated structs) or
// expanded inline.
package bqtypes
import (
"encoding/json"
"fmt"
"strconv"
)
// labelsWireState is populated by Dataset/Table UnmarshalJSON when the body
// carries an explicit labels field (including label-delete null values).
type labelsWireState struct {
present bool
delete []string
}
type collationWireState struct {
present bool
}
// DatasetReference is a stable handle to a dataset.
type DatasetReference struct {
ProjectID string `json:"projectId"`
DatasetID string `json:"datasetId"`
}
// TableReference is a stable handle to a table.
type TableReference struct {
ProjectID string `json:"projectId"`
DatasetID string `json:"datasetId"`
TableID string `json:"tableId"`
}
// JobReference is a stable handle to a job.
type JobReference struct {
ProjectID string `json:"projectId"`
JobID string `json:"jobId"`
Location string `json:"location,omitempty"`
}
// Dataset is the BigQuery Dataset resource (subset).
//
// Access is the dataset ACL — a list of role bindings. The field is
// always serialized (no `omitempty`) because the Java BigQuery client
// calls `new ArrayList<>(dataset.getAcl())` on the deserialized
// response, which NPEs when the field is null. Live BigQuery returns
// an empty array for newly-created datasets; the emulator must
// preserve that shape so AuthorizeDatasetIT-style ACL-mutation flows
// work end-to-end. See the failing-IT inventory in
// `docs/ENGINE_POLICY.md`.
//
// Labels is always serialized (no `omitempty`) for the same reason:
// the Node `getDatasetLabels` sample (and several upstream Python
// snippets) call `Object.entries(dataset.metadata.labels)` /
// `dict(dataset.labels)` on the deserialized response, which raises
// `TypeError: Cannot convert undefined or null to object` /
// `TypeError: argument of type 'NoneType' is not iterable` when the
// field is missing. Live BigQuery returns `labels: {}` for a newly
// created dataset; the resource builder defaults a nil map to `{}` to
// match. Same for Table.Labels below.
type Dataset struct {
Kind string `json:"kind,omitempty"` // bigquery#dataset
ID string `json:"id,omitempty"`
DatasetReference DatasetReference `json:"datasetReference"`
FriendlyName string `json:"friendlyName,omitempty"`
Description string `json:"description,omitempty"`
Location string `json:"location,omitempty"`
Etag string `json:"etag,omitempty"`
CreationTime string `json:"creationTime,omitempty"`
LastModifiedTime string `json:"lastModifiedTime,omitempty"`
Access []map[string]any `json:"access"`
Labels ResourceLabels `json:"labels"`
DefaultTableExpirationMs string `json:"defaultTableExpirationMs,omitempty"`
// DefaultPartitionExpirationMs is inherited by new time-partitioned
// tables in the dataset. See
// docs/bigquery/docs/reference/rest/v2/datasets/get.md.
DefaultPartitionExpirationMs string `json:"defaultPartitionExpirationMs,omitempty"`
// DefaultCollation is BigQuery's per-dataset default text
// collation (typically `und:ci` for the case-insensitive lane
// the upstream node sample exercises). The emulator does not
// honor it at query time today, but the value still has to
// round-trip through GET/PATCH so client libraries observe the
// shape they expect. See
// docs/bigquery/docs/reference/rest/v2/datasets/get.md.
DefaultCollation string `json:"defaultCollation,omitempty"`
// DefaultRoundingMode is inherited by new NUMERIC/BIGNUMERIC columns in
// tables created in this dataset. Round-trips via the gateway overlay.
DefaultRoundingMode string `json:"defaultRoundingMode,omitempty"`
// MaxTimeTravelHours is the dataset time-travel window (48–168 hours).
MaxTimeTravelHours string `json:"maxTimeTravelHours,omitempty"`
// IsCaseInsensitive marks dataset/table name lookups as case-insensitive.
IsCaseInsensitive *bool `json:"isCaseInsensitive,omitempty"`
// ResourceTags are GCP resource manager tags attached to the dataset.
ResourceTags map[string]string `json:"resourceTags,omitempty"`
// Replicas echoes cross-region replica references supplied on write;
// the emulator does not model active replication.
Replicas []TableReference `json:"replicas,omitempty"`
// ExternalDatasetReference marks a Spanner / Cloud SQL external dataset.
ExternalDatasetReference *ExternalDatasetReference `json:"externalDatasetReference,omitempty"`
labelsWire labelsWireState `json:"-"`
defaultCollationWire collationWireState `json:"-"`
DefaultCollationSet bool `json:"-"`
omitEmptyLabelsOnWire bool `json:"-"`
}
// LabelsPatchPresent reports whether a decoded request body explicitly set labels.
func (d Dataset) LabelsPatchPresent() bool {
return d.labelsWire.present
}
// LabelsToDelete returns label keys cleared via JSON null in the request body.
func (d Dataset) LabelsToDelete() []string {
return d.labelsWire.delete
}
// DefaultCollationPresent reports whether the request body explicitly set
// defaultCollation (including empty string to clear).
func (d Dataset) DefaultCollationPresent() bool {
return d.defaultCollationWire.present
}
// SetOmitEmptyLabelsOnWire omits the labels JSON field on PATCH responses
// when empty so Node deleteLabel* samples log `undefined` for apiResponse.labels.
func (d *Dataset) SetOmitEmptyLabelsOnWire(v bool) {
d.omitEmptyLabelsOnWire = v
}
// UnmarshalJSON accepts labels values of JSON null (label delete) and the
// usual string map entries client libraries send on datasets.patch.
func (d *Dataset) UnmarshalJSON(data []byte) error {
type alias Dataset
var raw struct {
alias
Labels json.RawMessage `json:"labels,omitempty"`
DefaultCollation json.RawMessage `json:"defaultCollation,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*d = Dataset(raw.alias)
patch, err := parseLabelsJSON(raw.Labels)
if err != nil {
return err
}
if patch.present {
d.Labels = ResourceLabels(patch.values)
d.labelsWire = labelsWireState{present: true, delete: patch.delete}
}
if raw.DefaultCollation != nil {
d.defaultCollationWire.present = true
d.DefaultCollationSet = true
if err := json.Unmarshal(raw.DefaultCollation, &d.DefaultCollation); err != nil {
return fmt.Errorf("defaultCollation: %w", err)
}
}
return nil
}
// MarshalJSON emits labels:{} by default; omits labels when empty after a
// label-delete PATCH so Node clients surface apiResponse.labels as undefined.
func (d Dataset) MarshalJSON() ([]byte, error) {
type alias Dataset
var raw []byte
var err error
if d.omitEmptyLabelsOnWire && len(d.Labels) == 0 {
raw, err = marshalWithoutJSONField(alias(d), "labels")
} else {
raw, err = json.Marshal(alias(d))
}
if err != nil || !d.DefaultCollationSet {
return raw, err
}
return injectJSONStringField(raw, "defaultCollation", d.DefaultCollation)
}
// Table is the BigQuery Table resource (subset).
//
// Labels is always serialized (no `omitempty`); see the matching note
// on Dataset.Labels. tableResource defaults a nil map to `{}` so the
// upstream `getTableLabels` sample's `Object.entries(table.metadata.labels)`
// returns an empty iterator instead of erroring.
type Table struct {
Kind string `json:"kind,omitempty"` // bigquery#table
ID string `json:"id,omitempty"`
TableReference TableReference `json:"tableReference"`
FriendlyName string `json:"friendlyName,omitempty"`
Description string `json:"description,omitempty"`
Schema *TableSchema `json:"schema,omitempty"`
Type string `json:"type,omitempty"` // TABLE | VIEW | EXTERNAL
NumRows string `json:"numRows,omitempty"`
NumBytes string `json:"numBytes,omitempty"`
// Output-only storage breakdown fields. The gateway stubs these to "0"
// until the engine exposes byte accounting RPCs.
NumLongTermBytes string `json:"numLongTermBytes,omitempty"`
NumActiveLogicalBytes string `json:"numActiveLogicalBytes,omitempty"`
NumTotalLogicalBytes string `json:"numTotalLogicalBytes,omitempty"`
NumCurrentPhysicalBytes string `json:"numCurrentPhysicalBytes,omitempty"`
NumPhysicalBytes string `json:"numPhysicalBytes,omitempty"`
NumActivePhysicalBytes string `json:"numActivePhysicalBytes,omitempty"`
NumLongTermPhysicalBytes string `json:"numLongTermPhysicalBytes,omitempty"`
NumTimeTravelPhysicalBytes string `json:"numTimeTravelPhysicalBytes,omitempty"`
CreationTime string `json:"creationTime,omitempty"`
LastModifiedTime string `json:"lastModifiedTime,omitempty"`
Etag string `json:"etag,omitempty"`
Labels ResourceLabels `json:"labels"`
// ExpirationTime is the wall-clock time at which the table
// expires, encoded as a decimal string of milliseconds since
// epoch -- BigQuery REST always serializes int64 timestamps
// as strings to dodge JavaScript's 53-bit integer ceiling.
// `omitempty` is intentional: live BigQuery omits the field
// when the table has no expiration.
ExpirationTime MillisTimestamp `json:"expirationTime,omitempty"`
// RangePartitioning is the integer-range partitioning spec
// (`{field, range:{start,end,interval}}`) the upstream node
// `createTableRangePartitioned` sample sets and the matching
// test asserts on the GET response.
RangePartitioning *RangePartitioning `json:"rangePartitioning,omitempty"`
// TimePartitioning is the (TIME / DAY / HOUR / MONTH / YEAR)
// time-based partitioning spec. Not exercised by every test
// but parallel to RangePartitioning so the roundtrip helper
// can carry it without dropping the field on the floor.
TimePartitioning *TimePartitioning `json:"timePartitioning,omitempty"`
// Clustering is the per-table clustering spec the upstream
// node `createTableClustered` sample sets via
// `{ fields: ['city', 'zipcode'] }`.
Clustering *Clustering `json:"clustering,omitempty"`
// DefaultCollation is the table-level default text collation
// (typically `und:ci`). Mirrors Dataset.DefaultCollation;
// see that field's comment for the round-trip rationale.
DefaultCollation string `json:"defaultCollation,omitempty"`
// Location is the BigQuery region for the table (inherited from
// its dataset on live BigQuery). Client libraries such as
// google-cloud-bigquery and BigFrames read this on tables.get.
Location string `json:"location,omitempty"`
// RequirePartitionFilter mirrors the table-level partition-filter
// requirement BigQuery REST exposes. Pointer semantics let PATCH
// bodies set `false` explicitly without conflating unset and false.
RequirePartitionFilter *bool `json:"requirePartitionFilter,omitempty"`
// View holds the view definition when Type is VIEW.
View *ViewDefinition `json:"view,omitempty"`
// MaterializedView holds the MV definition when Type is
// MATERIALIZED_VIEW. The query is analyzed at insert time to
// infer the catalog schema when the client omits an explicit
// TableSchema (see QueryMaterializedViewIT).
MaterializedView *MaterializedViewDefinition `json:"materializedView,omitempty"`
// ExternalDataConfiguration describes a table backed by data
// outside the emulator catalog (GCS CSV/JSON/Parquet, ...).
// Persisted in the gateway MetadataStore and materialized into
// the engine catalog at insert/query time for supported formats.
ExternalDataConfiguration *ExternalDataConfiguration `json:"externalDataConfiguration,omitempty"`
// EncryptionConfiguration stores the opaque CMEK kmsKeyName the
// client supplied on create/load/update. Not enforced by the emulator.
EncryptionConfiguration *EncryptionConfiguration `json:"encryptionConfiguration,omitempty"`
// DefaultRoundingMode is inherited by new NUMERIC/BIGNUMERIC columns.
DefaultRoundingMode string `json:"defaultRoundingMode,omitempty"`
// CaseInsensitive marks table name lookups as case-insensitive within
// a case-insensitive dataset.
CaseInsensitive *bool `json:"caseInsensitive,omitempty"`
// ResourceTags are GCP resource manager tags attached to the table.
ResourceTags map[string]string `json:"resourceTags,omitempty"`
// TableConstraints carries primary/foreign key metadata (not enforced).
TableConstraints *TableConstraints `json:"tableConstraints,omitempty"`
// Replicas echoes cross-region replica references on write.
Replicas []TableReference `json:"replicas,omitempty"`
// BiglakeConfiguration marks a BigLake-managed table (unsupported).
BiglakeConfiguration *BiglakeConfiguration `json:"biglakeConfiguration,omitempty"`
// ObjectTableOptions marks an object table (unsupported).
ObjectTableOptions *ObjectTableOptions `json:"objectTableOptions,omitempty"`
labelsWire labelsWireState `json:"-"`
defaultCollationWire collationWireState `json:"-"`
DefaultCollationSet bool `json:"-"`
omitEmptyLabelsOnWire bool `json:"-"`
}
// LabelsPatchPresent reports whether a decoded request body explicitly set labels.
func (t Table) LabelsPatchPresent() bool {
return t.labelsWire.present
}
// LabelsToDelete returns label keys cleared via JSON null in the request body.
func (t Table) LabelsToDelete() []string {
return t.labelsWire.delete
}
// DefaultCollationPresent reports whether the request body explicitly set
// defaultCollation (including empty string to clear).
func (t Table) DefaultCollationPresent() bool {
return t.defaultCollationWire.present
}
// SetOmitEmptyLabelsOnWire omits the labels JSON field on PATCH responses
// when empty so Node deleteLabel* samples log `undefined` for apiResponse.labels.
func (t *Table) SetOmitEmptyLabelsOnWire(v bool) {
t.omitEmptyLabelsOnWire = v
}
// UnmarshalJSON accepts expirationTime as a decimal string or JSON number and
// labels values of JSON null (label delete).
func (t *Table) UnmarshalJSON(data []byte) error {
type alias Table
var raw struct {
alias
ExpirationTime json.RawMessage `json:"expirationTime,omitempty"`
Labels json.RawMessage `json:"labels,omitempty"`
DefaultCollation json.RawMessage `json:"defaultCollation,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*t = Table(raw.alias)
if raw.ExpirationTime != nil {
var ts MillisTimestamp
if err := json.Unmarshal(raw.ExpirationTime, &ts); err != nil {
return fmt.Errorf("expirationTime: %w", err)
}
t.ExpirationTime = ts
}
patch, err := parseLabelsJSON(raw.Labels)
if err != nil {
return err
}
if patch.present {
t.Labels = ResourceLabels(patch.values)
t.labelsWire = labelsWireState{present: true, delete: patch.delete}
}
if raw.DefaultCollation != nil {
t.defaultCollationWire.present = true
t.DefaultCollationSet = true
if err := json.Unmarshal(raw.DefaultCollation, &t.DefaultCollation); err != nil {
return fmt.Errorf("defaultCollation: %w", err)
}
}
return nil
}
// MarshalJSON emits labels:{} by default; omits labels when empty after a
// label-delete PATCH so Node clients surface apiResponse.labels as undefined.
func (t Table) MarshalJSON() ([]byte, error) {
type alias Table
var raw []byte
var err error
if t.omitEmptyLabelsOnWire && len(t.Labels) == 0 {
raw, err = marshalWithoutJSONField(alias(t), "labels")
} else {
raw, err = json.Marshal(alias(t))
}
if err != nil || !t.DefaultCollationSet {
return raw, err
}
return injectJSONStringField(raw, "defaultCollation", t.DefaultCollation)
}
// ExternalDataConfiguration mirrors the BigQuery REST external data
// source object. See docs/bigquery/docs/reference/rest/v2/tables.md.
type ExternalDataConfiguration struct {
SourceURIs []string `json:"sourceUris,omitempty"`
SourceFormat string `json:"sourceFormat,omitempty"`
Autodetect bool `json:"autodetect,omitempty"`
Schema *TableSchema `json:"schema,omitempty"`
CsvOptions *CsvOptions `json:"csvOptions,omitempty"`
GoogleSheetsOptions *GoogleSheetsOptions `json:"googleSheetsOptions,omitempty"`
HivePartitioningOptions *HivePartitioningOptions `json:"hivePartitioningOptions,omitempty"`
IgnoreUnknownValues bool `json:"ignoreUnknownValues,omitempty"`
MaxBadRecords int `json:"maxBadRecords,omitempty"`
Compression string `json:"compression,omitempty"`
}
// HivePartitioningOptions mirrors the BigQuery REST hivePartitioningOptions
// object. See docs/bigquery/docs/reference/rest/v2/tables.md.
type HivePartitioningOptions struct {
Mode string `json:"mode,omitempty"`
SourceURIPrefix string `json:"sourceUriPrefix,omitempty"`
RequirePartitionFilter bool `json:"requirePartitionFilter,omitempty"`
Fields []string `json:"fields,omitempty"`
}
// CsvOptions is the csvOptions sub-object of ExternalDataConfiguration.
type CsvOptions struct {
FieldDelimiter string `json:"fieldDelimiter,omitempty"`
Quote string `json:"quote,omitempty"`
Encoding string `json:"encoding,omitempty"`
AllowJaggedRows bool `json:"allowJaggedRows,omitempty"`
AllowQuotedNewlines bool `json:"allowQuotedNewlines,omitempty"`
skipLeadingRows int
}
// SkipLeadingRows returns the number of leading CSV rows to skip.
func (o *CsvOptions) SkipLeadingRows() int {
if o == nil {
return 0
}
return o.skipLeadingRows
}
// UnmarshalJSON accepts skipLeadingRows as JSON number or decimal string.
func (o *CsvOptions) UnmarshalJSON(data []byte) error {
type alias CsvOptions
var raw struct {
alias
SkipLeadingRows any `json:"skipLeadingRows,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*o = CsvOptions(raw.alias)
if raw.SkipLeadingRows == nil {
return nil
}
switch v := raw.SkipLeadingRows.(type) {
case float64:
o.skipLeadingRows = int(v)
case string:
n, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("csvOptions.skipLeadingRows: %w", err)
}
o.skipLeadingRows = n
default:
return fmt.Errorf("csvOptions.skipLeadingRows: unsupported type %T", v)
}
return nil
}
// GoogleSheetsOptions is the googleSheetsOptions sub-object.
type GoogleSheetsOptions struct {
Range string `json:"range,omitempty"`
skipLeadingRows int
}
// SkipLeadingRows returns the number of leading sheet rows to skip.
func (o *GoogleSheetsOptions) SkipLeadingRows() int {
if o == nil {
return 0
}
return o.skipLeadingRows
}
// UnmarshalJSON accepts skipLeadingRows as JSON number or decimal string.
func (o *GoogleSheetsOptions) UnmarshalJSON(data []byte) error {
type alias GoogleSheetsOptions
var raw struct {
alias
SkipLeadingRows any `json:"skipLeadingRows,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*o = GoogleSheetsOptions(raw.alias)
if raw.SkipLeadingRows == nil {
return nil
}
switch v := raw.SkipLeadingRows.(type) {
case float64:
o.skipLeadingRows = int(v)
case string:
n, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("googleSheetsOptions.skipLeadingRows: %w", err)
}
o.skipLeadingRows = n
default:
return fmt.Errorf("googleSheetsOptions.skipLeadingRows: unsupported type %T", v)
}
return nil
}
// TableConstraints mirrors the BigQuery REST tableConstraints object.
type TableConstraints struct {
PrimaryKey *PrimaryKey `json:"primaryKey,omitempty"`
}
// PrimaryKey is the primaryKey sub-object of TableConstraints.
type PrimaryKey struct {
Columns []string `json:"columns,omitempty"`
}
// ViewDefinition is the BigQuery REST view sub-object. See
// docs/bigquery/docs/reference/rest/v2/tables#ViewDefinition.
type ViewDefinition struct {
Query string `json:"query,omitempty"`
UseLegacySQL bool `json:"useLegacySql,omitempty"`
}
// MaterializedViewDefinition is the BigQuery REST materializedView
// sub-object. See docs/bigquery/docs/reference/rest/v2/tables#MaterializedViewDefinition.
type MaterializedViewDefinition struct {
Query string `json:"query,omitempty"`
}
// TimePartitioning describes time-based partitioning. Carried for
// roundtrip only; the emulator does not enforce partition expiration.
type TimePartitioning struct {
Type string `json:"type,omitempty"`
Field string `json:"field,omitempty"`
ExpirationMs string `json:"expirationMs,omitempty"`
}
// Clustering is the per-table clustering spec.
type Clustering struct {
Fields []string `json:"fields,omitempty"`
}
// TableSchema is the BigQuery TableSchema resource.
type TableSchema struct {
Fields []TableFieldSchema `json:"fields,omitempty"`
}
// TableFieldSchema is one column in a TableSchema.
type TableFieldSchema struct {
Name string `json:"name"`
Type string `json:"type"` // STRING, INT64, FLOAT64, BOOL, TIMESTAMP, ...
Mode string `json:"mode,omitempty"` // NULLABLE, REQUIRED, REPEATED
Description string `json:"description,omitempty"`
DefaultValueExpression string `json:"defaultValueExpression,omitempty"`
Collation string `json:"collation,omitempty"`
PolicyTags *PolicyTagList `json:"policyTags,omitempty"`
// MaskKind is an emulator extension for column-level data masking
// (NULLIFY | SHA256 | DEFAULT_VALUE | DENIED). BigQuery clients
// ignore unknown JSON fields; the gateway persists this via
// SetColumnGovernance on tables.insert/patch/update.
MaskKind string `json:"maskKind,omitempty"`
Fields []TableFieldSchema `json:"fields,omitempty"` // for STRUCT/RECORD
}
// ExternalDatasetReference links a dataset to an external Spanner / Cloud SQL source.
type ExternalDatasetReference struct {
Connection string `json:"connection,omitempty"`
Source string `json:"source,omitempty"`
}
// BiglakeConfiguration marks a BigLake-managed table.
type BiglakeConfiguration struct {
ConnectionID string `json:"connectionId,omitempty"`
StorageURI string `json:"storageUri,omitempty"`
FileFormat string `json:"fileFormat,omitempty"`
TableFormat string `json:"tableFormat,omitempty"`
}
// ObjectTableOptions marks an object table over GCS object metadata.
type ObjectTableOptions struct {
SourceURIs []string `json:"sourceUris,omitempty"`
}
package bqtypes
import (
"errors"
"strconv"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// ValueToCell converts a `bigquery_emulator.v1.Cell` from the engine
// gRPC contract into the BigQuery REST `f`/`v` wire shape that
// `jobs.query`, `jobs.getQueryResults`, and `tabledata.list` emit.
//
// The C++ engine is responsible for serializing each `googlesql::Value`
// into the per-TypeKind string form documented at
// docs/bigquery/docs/reference/rest/v2/StandardSqlDataType.md (mirrored
// in docs/REST_API.md "Type wire encoding"). At the wire level
// everything is strings/objects/arrays, so this function is mostly a
// structural rewrap:
//
// - `INT64` -> decimal string, e.g. "42"
// - `BOOL` -> "true" or "false"
// - `FLOAT64` -> decimal string, or "NaN" / "Infinity" / "-Infinity"
// - `STRING` -> raw string
// - `BYTES` -> base64 string (RFC 4648 section 4)
// - `DATE` -> "YYYY-MM-DD"
// - `TIMESTAMP` -> RFC 3339 with mandatory `Z`, microsecond precision
// - `DATETIME` -> "YYYY-MM-DD HH:MM:SS.ffffff"
// - `TIME` -> "HH:MM:SS.ffffff"
// - `NUMERIC` -> decimal string
// - `BIGNUMERIC` -> decimal string
// - `GEOGRAPHY` -> WKT string
// - `JSON` -> string-encoded JSON
// - `ARRAY` -> Cell whose `v` is a list of {"v": ...} entries
// - `STRUCT` -> Cell whose `v` is a Row-shaped {"f": [{"v": ...}, ...]}
// - NULL -> Cell whose `v` is nil (JSON null)
//
// STRUCT is rendered as a nested `Row` (positional `f`) rather than a
// JSON object so it round-trips through `tabledata.list`, which
// disallows duplicate field names. ARRAY elements are themselves
// `Cell`s so nested ARRAY-of-STRUCT, ARRAY-of-ARRAY, and NULL elements
// all marshal consistently.
//
// A nil input cell is treated as NULL.
func ValueToCell(c *enginepb.Cell) Cell {
if c == nil {
return Cell{V: nil}
}
switch v := c.GetValue().(type) {
case *enginepb.Cell_StringValue:
return Cell{V: v.StringValue}
case *enginepb.Cell_NullValue:
return Cell{V: nil}
case *enginepb.Cell_Array:
elements := v.Array.GetElements()
out := make([]Cell, 0, len(elements))
for _, el := range elements {
out = append(out, ValueToCell(el))
}
return Cell{V: out}
case *enginepb.Cell_StructValue:
fields := v.StructValue.GetFields()
out := make([]Cell, 0, len(fields))
for _, f := range fields {
out = append(out, ValueToCell(f))
}
return Cell{V: Row{F: out}}
default:
return Cell{V: nil}
}
}
// CellsToRow lowers a flat slice of engine cells into the top-level
// `f`/`v` Row shape BigQuery REST clients expect. Top-level rows are
// always Row-shaped; a STRUCT column nested inside the row becomes a
// Cell whose `v` is itself a Row (handled by ValueToCell).
func CellsToRow(cells []*enginepb.Cell) Row {
out := Row{F: make([]Cell, 0, len(cells))}
for _, c := range cells {
out.F = append(out.F, ValueToCell(c))
}
return out
}
// WireFormatOptions controls optional REST wire-shape adjustments for
// tabledata.list and query results.
type WireFormatOptions struct {
UseInt64Timestamp bool
}
// CellsToRowForSchema is like CellsToRow but re-encodes TIMESTAMP
// values as decimal microsecond strings. google-cloud-bigquery's
// query-result parser (`CELL_DATA_PARSER.timestamp_to_py`) expects
// microseconds since Unix epoch, not the human-readable strings the
// engine emits.
func CellsToRowForSchema(
cells []*enginepb.Cell,
schema *enginepb.TableSchema,
opts ...WireFormatOptions,
) Row {
var format WireFormatOptions
if len(opts) > 0 {
format = opts[0]
}
fields := []*enginepb.FieldSchema(nil)
if schema != nil {
fields = schema.GetFields()
}
out := Row{F: make([]Cell, 0, len(cells))}
for i, c := range cells {
var field *enginepb.FieldSchema
if i < len(fields) {
field = fields[i]
}
out.F = append(out.F, encodeCellForField(ValueToCell(c), field, format))
}
return out
}
func encodeCellForField(cell Cell, field *enginepb.FieldSchema, format WireFormatOptions) Cell {
if cell.V == nil || field == nil {
return cell
}
fieldType := field.GetType()
if strings.HasPrefix(fieldType, "ARRAY<") {
elements, ok := cell.V.([]Cell)
if !ok {
return cell
}
elemField := arrayElementFieldSchema(field)
out := make([]Cell, len(elements))
for i, el := range elements {
out[i] = encodeCellForField(el, elemField, format)
}
return Cell{V: out}
}
switch fieldType {
case "TIMESTAMP":
s, ok := cell.V.(string)
if !ok {
return cell
}
if strings.TrimSpace(s) == "" {
return Cell{V: nil}
}
if micros, err := TimestampStringToMicros(s); err == nil {
if format.UseInt64Timestamp {
if n, parseErr := strconv.ParseInt(micros, 10, 64); parseErr == nil {
return Cell{V: n}
}
}
return Cell{V: micros}
}
return cell
case "STRUCT", "RECORD":
row, ok := cell.V.(Row)
if !ok {
return cell
}
subFields := field.GetFields()
out := make([]Cell, len(row.F))
for i, subCell := range row.F {
var subField *enginepb.FieldSchema
if i < len(subFields) {
subField = subFields[i]
}
out[i] = encodeCellForField(subCell, subField, format)
}
return Cell{V: Row{F: out}}
default:
return cell
}
}
func arrayElementFieldSchema(field *enginepb.FieldSchema) *enginepb.FieldSchema {
t := field.GetType()
if !strings.HasPrefix(t, "ARRAY<") {
return field
}
inner := strings.TrimSuffix(strings.TrimPrefix(t, "ARRAY<"), ">")
return &enginepb.FieldSchema{Type: inner}
}
// TimestampStringToMicros parses an engine TIMESTAMP wire string and
// returns the BigQuery REST query-result encoding: decimal microseconds
// since 1970-01-01 UTC.
func TimestampStringToMicros(s string) (string, error) {
s = strings.TrimSpace(s)
if s == "" {
return "", errors.New("empty timestamp")
}
// Storage Read and some engine paths already emit epoch micros as decimal digits.
if isDecimalIntString(s) {
return s, nil
}
t, err := parseTimestampWireString(s)
if err != nil {
return "", err
}
utc := t.UTC()
micros := utc.Unix()*1_000_000 + int64(utc.Nanosecond()/1000)
return strconv.FormatInt(micros, 10), nil
}
func parseTimestampWireString(s string) (time.Time, error) {
s = strings.TrimSpace(s)
s = strings.Replace(s, "T", " ", 1)
s = strings.Replace(s, "+00:00", "+00", 1)
s = strings.Replace(s, "Z", "+00", 1)
layouts := []string{
"2006-01-02 15:04:05.999999-07",
"2006-01-02 15:04:05-07",
"2006-01-02 15:04:05.999999",
"2006-01-02 15:04:05",
}
var lastErr error
for _, layout := range layouts {
t, err := time.Parse(layout, s)
if err == nil {
return t, nil
}
lastErr = err
}
return time.Time{}, lastErr
}
func isDecimalIntString(s string) bool {
if s == "" {
return false
}
for _, r := range s {
if r < '0' || r > '9' {
return false
}
}
return true
}
package bqtypes
import (
"bytes"
"encoding/json"
"fmt"
"maps"
"strconv"
)
// injectJSONStringField forces a top-level string field onto an encoded object.
func injectJSONStringField(raw []byte, key, value string) ([]byte, error) {
var doc map[string]json.RawMessage
if err := json.Unmarshal(raw, &doc); err != nil {
return nil, err
}
encoded, err := json.Marshal(value)
if err != nil {
return nil, err
}
doc[key] = encoded
return json.Marshal(doc)
}
// marshalWithoutJSONField JSON-encodes v while dropping one top-level field.
func marshalWithoutJSONField(v any, dropField string) ([]byte, error) {
raw, err := json.Marshal(v)
if err != nil {
return nil, err
}
var doc map[string]json.RawMessage
if err := json.Unmarshal(raw, &doc); err != nil {
return nil, err
}
delete(doc, dropField)
return json.Marshal(doc)
}
// ResourceLabels is a BigQuery labels map on Dataset/Table resources.
// UnmarshalJSON accepts null values as deletion markers (the upstream
// Node `deleteLabelDataset` sample sends `{color: null}` via
// setMetadata). MarshalJSON always emits `{}` for a nil map so client
// libraries that call `Object.entries(resource.labels)` never see a
// missing field.
type ResourceLabels map[string]string
// MarshalJSON implements json.Marshaler.
func (l ResourceLabels) MarshalJSON() ([]byte, error) {
if l == nil {
return []byte("{}"), nil
}
return json.Marshal(map[string]string(l))
}
// UnmarshalJSON implements json.Unmarshaler.
func (l *ResourceLabels) UnmarshalJSON(data []byte) error {
var raw map[string]json.RawMessage
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
out := make(ResourceLabels, len(raw))
for k, v := range raw {
if bytes.Equal(bytes.TrimSpace(v), []byte("null")) {
continue
}
var s string
if err := json.Unmarshal(v, &s); err != nil {
return fmt.Errorf("labels[%q]: %w", k, err)
}
out[k] = s
}
*l = out
return nil
}
// MillisTimestamp is a BigQuery REST int64 millis-since-epoch field
// encoded as a decimal string on the wire. UnmarshalJSON also accepts
// JSON numbers because the Node client sometimes posts expirationTime
// as a number on tables.insert.
type MillisTimestamp string
// String returns the canonical decimal string form.
func (t MillisTimestamp) String() string {
return string(t)
}
// UnmarshalJSON implements json.Unmarshaler.
func (t *MillisTimestamp) UnmarshalJSON(data []byte) error {
data = bytes.TrimSpace(data)
if bytes.Equal(data, []byte("null")) {
*t = ""
return nil
}
if len(data) > 0 && data[0] == '"' {
var s string
if err := json.Unmarshal(data, &s); err != nil {
return err
}
*t = MillisTimestamp(s)
return nil
}
var n json.Number
if err := json.Unmarshal(data, &n); err != nil {
return fmt.Errorf("millis timestamp: %w", err)
}
i, err := n.Int64()
if err != nil {
return fmt.Errorf("millis timestamp: %w", err)
}
*t = MillisTimestamp(strconv.FormatInt(i, 10))
return nil
}
type labelsPatch struct {
values map[string]string
delete []string
present bool
}
func parseLabelsJSON(data json.RawMessage) (labelsPatch, error) {
var patch labelsPatch
if data == nil {
return patch, nil
}
patch.present = true
var raw map[string]json.RawMessage
if err := json.Unmarshal(data, &raw); err != nil {
return patch, err
}
patch.values = make(map[string]string, len(raw))
for key, val := range raw {
if bytes.Equal(bytes.TrimSpace(val), []byte("null")) {
patch.delete = append(patch.delete, key)
continue
}
var s string
if err := json.Unmarshal(val, &s); err != nil {
return patch, fmt.Errorf("labels[%q]: %w", key, err)
}
patch.values[key] = s
}
return patch, nil
}
// ApplyLabelsPatch merges explicit labels updates, including JSON-null deletions.
func ApplyLabelsPatch(
base map[string]string,
present bool,
values map[string]string,
deleteKeys []string,
) map[string]string {
if !present {
return base
}
out := make(map[string]string, len(base)+len(values))
maps.Copy(out, base)
for _, k := range deleteKeys {
delete(out, k)
}
maps.Copy(out, values)
return out
}
// UnmarshalWriteDisposition accepts a JSON string or a one-element
// string array (the upstream `relaxColumnQueryAppend` sample posts
// writeDisposition as ['WRITE_APPEND']).
func UnmarshalWriteDisposition(raw json.RawMessage) (string, error) {
if len(raw) == 0 || bytes.Equal(bytes.TrimSpace(raw), []byte("null")) {
return "", nil
}
trim := bytes.TrimSpace(raw)
if len(trim) > 0 && trim[0] == '[' {
var arr []string
if err := json.Unmarshal(trim, &arr); err != nil {
return "", fmt.Errorf("writeDisposition: %w", err)
}
if len(arr) == 1 {
return arr[0], nil
}
return "", fmt.Errorf("writeDisposition: want single-element array, got %d elements", len(arr))
}
var s string
if err := json.Unmarshal(trim, &s); err != nil {
return "", fmt.Errorf("writeDisposition: %w", err)
}
return s, nil
}
// Package copy implements synchronous BigQuery COPY jobs.
package copy
import (
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
"github.com/vantaboard/bigquery-emulator/gateway/snapshots"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
const (
writeTruncate = "WRITE_TRUNCATE"
writeEmpty = "WRITE_EMPTY"
writeAppend = "WRITE_APPEND"
createNever = "CREATE_NEVER"
// OperationCopy is the default copy job operation (table-to-table copy).
OperationCopy = "COPY"
// OperationSnapshot creates a SNAPSHOT destination from a TABLE source.
OperationSnapshot = "SNAPSHOT"
// OperationRestore creates a TABLE destination from a SNAPSHOT source.
OperationRestore = "RESTORE"
// OperationClone is accepted but treated like COPY (clone billing is N/A).
OperationClone = "CLONE"
)
// NormalizeOperationType maps empty/unspecified operationType to COPY.
func NormalizeOperationType(op string) string {
switch strings.ToUpper(strings.TrimSpace(op)) {
case "", "OPERATION_TYPE_UNSPECIFIED":
return OperationCopy
default:
return strings.ToUpper(strings.TrimSpace(op))
}
}
func validateOperationType(op string) error {
switch op {
case OperationCopy, OperationSnapshot, OperationRestore, OperationClone:
return nil
default:
return fmt.Errorf("unsupported operationType %q", op)
}
}
// Result captures copy-job statistics.
type Result struct {
CopiedRows int64
CopiedLogicalBytes int64
}
// Execute runs a synchronous COPY job.
func Execute(ctx context.Context, catalog enginepb.CatalogClient, query enginepb.QueryClient,
snapStore *snapshots.Store, cfg *jobs.JobConfigurationCopy, defaultProject string,
) (Result, error) {
if cfg == nil {
return Result{}, errors.New("copy configuration is required")
}
op := NormalizeOperationType(cfg.OperationType)
if err := validateOperationType(op); err != nil {
return Result{}, err
}
if cfg.DestinationTable == nil || cfg.DestinationTable.TableID == "" {
return Result{}, errors.New("destinationTable.tableId is required")
}
sources := sourceRefs(cfg, defaultProject)
if len(sources) == 0 {
return Result{}, errors.New("sourceTable or sourceTables is required")
}
destProject := cfg.DestinationTable.ProjectID
if destProject == "" {
destProject = defaultProject
}
destDataset := cfg.DestinationTable.DatasetID
destTable := cfg.DestinationTable.TableID
wd := cfg.WriteDisposition
if wd == "" {
if len(sources) > 1 {
wd = writeAppend
} else {
wd = writeEmpty
}
}
cd := cfg.CreateDisposition
if err := checkCreateDisposition(ctx, catalog, cd, destProject, destDataset, destTable); err != nil {
return Result{}, err
}
if shouldUseSQLCopy(ctx, catalog, snapStore, query, sources) {
return executeSQLCopy(ctx, catalog, query, sources, destProject, destDataset, destTable, wd, cd)
}
if hasSnapshotSource(sources) {
return executeCatalogCopy(ctx, catalog, snapStore, sources, destProject, destDataset, destTable, wd)
}
if query != nil {
if result, err := executeSQLCopy(
ctx,
catalog,
query,
sources,
destProject,
destDataset,
destTable,
wd,
cd,
); err == nil {
return result, nil
}
}
return executeCatalogCopy(ctx, catalog, snapStore, sources, destProject, destDataset, destTable, wd)
}
// shouldUseSQLCopy returns true when a decorated source references a live
// table and the engine SQL path (FOR SYSTEM_TIME AS OF) should be used.
// Deleted-table decorators resolve via snapshots.Store in catalog copy.
func shouldUseSQLCopy(ctx context.Context, catalog enginepb.CatalogClient,
snapStore *snapshots.Store, query enginepb.QueryClient,
sources []bqtypes.TableReference,
) bool {
if query == nil {
return false
}
for _, src := range sources {
base, epoch, decorated := snapshots.ParseDecorator(src.TableID)
if !decorated {
continue
}
if snapStore != nil {
if _, err := snapStore.ResolveAtEpoch(src.ProjectID, src.DatasetID, base, epoch); err == nil {
continue
}
}
ref := &enginepb.TableRef{
ProjectId: src.ProjectID,
DatasetId: src.DatasetID,
TableId: base,
}
if tableExists(ctx, catalog, ref) {
return true
}
}
return false
}
func checkCreateDisposition(ctx context.Context, catalog enginepb.CatalogClient,
cd, projectID, datasetID, tableID string,
) error {
if cd != createNever {
return nil
}
ref := &enginepb.TableRef{ProjectId: projectID, DatasetId: datasetID, TableId: tableID}
if !tableExists(ctx, catalog, ref) {
return status.Error(codes.NotFound,
fmt.Sprintf("Not found: Table %s:%s.%s", projectID, datasetID, tableID))
}
return nil
}
func sourceRefs(cfg *jobs.JobConfigurationCopy, defaultProject string) []bqtypes.TableReference {
if len(cfg.SourceTables) > 0 {
out := make([]bqtypes.TableReference, len(cfg.SourceTables))
copy(out, cfg.SourceTables)
for i := range out {
if out[i].ProjectID == "" {
out[i].ProjectID = defaultProject
}
}
return out
}
if cfg.SourceTable != nil {
ref := *cfg.SourceTable
if ref.ProjectID == "" {
ref.ProjectID = defaultProject
}
return []bqtypes.TableReference{ref}
}
return nil
}
func hasSnapshotSource(refs []bqtypes.TableReference) bool {
for _, ref := range refs {
if _, _, ok := snapshots.ParseDecorator(ref.TableID); ok {
return true
}
}
return false
}
func executeSQLCopy(ctx context.Context, catalog enginepb.CatalogClient, query enginepb.QueryClient,
sources []bqtypes.TableReference, destProject, destDataset, destTable, wd, cd string,
) (Result, error) {
if cd == createNever {
ref := &enginepb.TableRef{ProjectId: destProject, DatasetId: destDataset, TableId: destTable}
if !tableExists(ctx, catalog, ref) {
return Result{}, status.Error(codes.NotFound,
fmt.Sprintf("Not found: Table %s:%s.%s", destProject, destDataset, destTable))
}
}
sql, err := buildCopySQL(sources, destDataset, destTable, wd)
if err != nil {
return Result{}, err
}
stream, err := query.ExecuteQuery(ctx, &enginepb.QueryRequest{
ProjectId: destProject,
Sql: sql,
})
if err != nil {
return Result{}, err
}
for {
_, recvErr := stream.Recv()
if recvErr != nil {
if errors.Is(recvErr, io.EOF) {
break
}
return Result{}, recvErr
}
}
return countDestinationRows(ctx, catalog, destProject, destDataset, destTable)
}
func buildCopySQL(sources []bqtypes.TableReference, destDataset, destTable, wd string) (string, error) {
selects := make([]string, 0, len(sources))
for _, src := range sources {
base, epoch, decorated := snapshots.ParseDecorator(src.TableID)
from := fmt.Sprintf("%s.%s", quoteIdent(src.DatasetID), quoteIdent(base))
if decorated {
from = fmt.Sprintf("%s FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
from, epoch)
}
selects = append(selects, "SELECT * FROM "+from)
}
fromClause := strings.Join(selects, " UNION ALL ")
dest := fmt.Sprintf("%s.%s", quoteIdent(destDataset), quoteIdent(destTable))
switch wd {
case writeTruncate:
return fmt.Sprintf("CREATE OR REPLACE TABLE %s AS %s", dest, fromClause), nil
case writeEmpty:
return fmt.Sprintf("CREATE TABLE %s AS %s", dest, fromClause), nil
case writeAppend:
return fmt.Sprintf("INSERT INTO %s %s", dest, fromClause), nil
default:
return "", fmt.Errorf("unsupported writeDisposition %q", wd)
}
}
func quoteIdent(id string) string {
return "`" + strings.ReplaceAll(id, "`", "``") + "`"
}
func executeCatalogCopy(ctx context.Context, catalog enginepb.CatalogClient, snapStore *snapshots.Store,
sources []bqtypes.TableReference, destProject, destDataset, destTable, wd string,
) (Result, error) {
var mergedSchema *enginepb.TableSchema
var mergedRows []*enginepb.DataRow
var totalBytes int64
for _, src := range sources {
schema, rows, err := readSource(ctx, catalog, snapStore, src)
if err != nil {
return Result{}, err
}
if mergedSchema == nil {
mergedSchema = schema
} else if !schemasCompatible(mergedSchema, schema) {
return Result{}, errors.New("source tables must have identical schemas for multi-source copy")
}
mergedRows = append(mergedRows, rows...)
totalBytes += estimateRowBytes(rows)
}
if mergedSchema == nil {
return Result{}, errors.New("could not resolve source table schema")
}
if err := ensureDataset(ctx, catalog, destProject, destDataset); err != nil {
return Result{}, err
}
if err := applyWriteDisposition(ctx, catalog, destProject, destDataset, destTable, mergedSchema, wd); err != nil {
return Result{}, err
}
ref := seed.TableRef{ProjectID: destProject, DatasetID: destDataset, TableID: destTable}
applier := seed.NewCatalogApplier(catalog)
rowMaps := protoRowsToMaps(mergedSchema, mergedRows)
inserted, err := applier.InsertRows(ctx, ref, mergedSchema, rowMaps)
if err != nil {
return Result{}, err
}
return Result{
CopiedRows: int64(inserted),
CopiedLogicalBytes: totalBytes,
}, nil
}
func readSource(ctx context.Context, catalog enginepb.CatalogClient, snapStore *snapshots.Store,
ref bqtypes.TableReference,
) (*enginepb.TableSchema, []*enginepb.DataRow, error) {
base, epoch, decorated := snapshots.ParseDecorator(ref.TableID)
if decorated {
entry, err := snapStore.ResolveAtEpoch(ref.ProjectID, ref.DatasetID, base, epoch)
if err != nil {
return nil, nil, err
}
return entry.Schema, entry.Rows, nil
}
tableRef := &enginepb.TableRef{
ProjectId: ref.ProjectID,
DatasetId: ref.DatasetID,
TableId: base,
}
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
return nil, nil, fmt.Errorf("source table %s.%s.%s: %w",
ref.ProjectID, ref.DatasetID, base, err)
}
rows, err := listAllRows(ctx, catalog, tableRef)
if err != nil {
return nil, nil, err
}
return desc.GetSchema(), rows, nil
}
func listAllRows(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef,
) ([]*enginepb.DataRow, error) {
var out []*enginepb.DataRow
start := int64(0)
const page = 10_000
for {
resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: ref,
StartIndex: start,
MaxResults: page,
})
if err != nil {
return nil, err
}
rows := resp.GetRows()
if len(rows) == 0 {
break
}
out = append(out, rows...)
start += int64(len(rows))
if start >= resp.GetTotalRows() {
break
}
}
return out, nil
}
func protoRowsToMaps(schema *enginepb.TableSchema, rows []*enginepb.DataRow) []map[string]any {
out := make([]map[string]any, 0, len(rows))
fields := schema.GetFields()
for _, row := range rows {
m := make(map[string]any, len(fields))
cells := row.GetCells()
for i, f := range fields {
if i < len(cells) {
m[f.GetName()] = cellToAny(cells[i])
}
}
out = append(out, m)
}
return out
}
func cellToAny(c *enginepb.Cell) any {
if c == nil || c.GetNullValue() {
return nil
}
return c.GetStringValue()
}
func estimateRowBytes(rows []*enginepb.DataRow) int64 {
var n int64
for _, row := range rows {
for _, c := range row.GetCells() {
if c != nil {
n += int64(len(c.GetStringValue()))
}
}
}
return n
}
func ensureDataset(ctx context.Context, catalog enginepb.CatalogClient, projectID, datasetID string) error {
applier := seed.NewCatalogApplier(catalog)
_, err := applier.EnsureDataset(ctx, projectID, datasetID, "US")
return err
}
func applyWriteDisposition(ctx context.Context, catalog enginepb.CatalogClient,
projectID, datasetID, tableID string, schema *enginepb.TableSchema, wd string,
) error {
ref := &enginepb.TableRef{ProjectId: projectID, DatasetId: datasetID, TableId: tableID}
exists := tableExists(ctx, catalog, ref)
switch wd {
case writeTruncate:
if exists {
if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: ref}); err != nil {
return fmt.Errorf("WRITE_TRUNCATE drop: %w", err)
}
}
_, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{Table: ref, Schema: schema})
return err
case writeEmpty:
if exists {
return status.Error(codes.AlreadyExists,
fmt.Sprintf("Already Exists: Table %s:%s.%s", projectID, datasetID, tableID))
}
_, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{Table: ref, Schema: schema})
return err
default:
if !exists {
_, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{Table: ref, Schema: schema})
if err != nil && status.Code(err) != codes.AlreadyExists {
return err
}
}
return nil
}
}
func tableExists(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef) bool {
_, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
return err == nil
}
func schemasCompatible(a, b *enginepb.TableSchema) bool {
if a == nil || b == nil {
return a == b
}
af, bf := a.GetFields(), b.GetFields()
if len(af) != len(bf) {
return false
}
for i := range af {
if af[i].GetName() != bf[i].GetName() ||
af[i].GetType() != bf[i].GetType() ||
af[i].GetMode() != bf[i].GetMode() {
return false
}
}
return true
}
func countDestinationRows(ctx context.Context, catalog enginepb.CatalogClient,
projectID, datasetID, tableID string,
) (Result, error) {
resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
StartIndex: 0,
MaxResults: 0,
})
if err != nil {
return Result{}, err
}
n := resp.GetTotalRows()
return Result{CopiedRows: n, CopiedLogicalBytes: n}, nil
}
// FormatStatistics maps Result into jobs.CopyStatistics.
func FormatStatistics(r Result) *jobs.CopyStatistics {
return &jobs.CopyStatistics{
CopiedRows: strconv.FormatInt(r.CopiedRows, 10),
CopiedLogicalBytes: strconv.FormatInt(r.CopiedLogicalBytes, 10),
}
}
// Package engine is the Go-side gRPC client for the BigQuery emulator's
// C++ engine.
//
// The gateway and engine are two separate processes that talk over an
// in-process gRPC channel (see proto/emulator.proto and
// gateway/enginepb). This package wraps the dial / health-probe / close
// dance so the gateway lifecycle code in gateway.go and the per-request
// HTTP handlers in gateway/handlers can share one connection.
//
// Client mirrors the way cloud-spanner-emulator's gateway connects to
// emulator_main: a single insecure loopback channel, one shared
// connection per gateway process, health checked via grpc.health.v1
// before any business RPCs are dispatched.
package engine
import (
"context"
"errors"
"fmt"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
healthpb "google.golang.org/grpc/health/grpc_health_v1"
"google.golang.org/grpc/status"
)
// Client is a thin facade around the *grpc.ClientConn that the gateway
// uses to talk to the C++ engine. It owns the connection so callers can
// share one channel across all handlers and only have to Close once at
// shutdown.
//
// Catalog and Query are the two business-logic clients defined in
// proto/emulator.proto; StorageRead and StorageWrite are the internal
// storage contracts the public bqstorage shim adapts. Health is the
// standard grpc.health.v1 probe the engine wires up via
// grpc::EnableDefaultHealthCheckService (see frontend/server/server.cc).
type Client struct {
conn *grpc.ClientConn
Catalog enginepb.CatalogClient
Query enginepb.QueryClient
SQLTools enginepb.SqlToolsClient
StorageRead enginepb.StorageReadClient
StorageWrite enginepb.StorageWriteClient
Health healthpb.HealthClient
}
// Dial opens a gRPC channel to the engine listening at address (typically
// "host:port" on the loopback interface) and returns a Client that wraps
// it. The connection uses insecure credentials because the channel never
// leaves the local machine; the engine subprocess's listening port is a
// gateway-internal contract, not a public API.
//
// Dial does not wait for the engine to be ready. Call WaitForReady (or
// the gateway's own startup probe) before issuing business RPCs. The
// returned Client owns its connection; callers must Close it at
// shutdown.
func Dial(address string) (*Client, error) {
if address == "" {
return nil, errors.New("engine: empty engine address")
}
conn, err := grpc.NewClient(
address,
grpc.WithTransportCredentials(insecure.NewCredentials()),
)
if err != nil {
return nil, fmt.Errorf("engine: dial %s: %w", address, err)
}
return &Client{
conn: conn,
Catalog: enginepb.NewCatalogClient(conn),
Query: enginepb.NewQueryClient(conn),
SQLTools: enginepb.NewSqlToolsClient(conn),
StorageRead: enginepb.NewStorageReadClient(conn),
StorageWrite: enginepb.NewStorageWriteClient(conn),
Health: healthpb.NewHealthClient(conn),
}, nil
}
// Close releases the underlying gRPC channel. It is safe to call on a
// nil receiver (gateway constructed without an engine subprocess). It
// is also idempotent; subsequent calls are no-ops because *grpc.ClientConn
// itself is idempotent on Close.
func (c *Client) Close() error {
if c == nil || c.conn == nil {
return nil
}
return c.conn.Close()
}
// healthRetryInterval is the gap between successive grpc.health.v1.Check
// probes inside WaitForReady. Tuned to keep the worst-case startup
// latency low (we expect the engine subprocess to bind its socket within
// a few hundred milliseconds) without burning CPU on tight retries.
const healthRetryInterval = 100 * time.Millisecond
// WaitForReady polls grpc.health.v1.Health.Check on the empty service
// name until it reports SERVING. A SERVING response means the engine has
// finished BuildAndStart and called SetServingStatus("", true) (see
// frontend/server/server.cc), which is the moment business RPCs become
// safe to issue.
//
// The loop is bounded by ctx; callers typically wrap a context.Background
// with a 30s timeout (see gateway.waitForEngine). Transient errors
// (Unavailable, DeadlineExceeded, Connection refused before the engine
// has started listening) are retried at healthRetryInterval; non-
// transient errors (for example Unimplemented, returned by an engine
// without the health service registered) are surfaced immediately so we
// fail fast instead of waiting out the timeout.
//
// Returns nil on SERVING, ctx.Err() on timeout/cancel, or a wrapped
// status error for non-retriable conditions.
func (c *Client) WaitForReady(ctx context.Context) error {
if c == nil {
return errors.New("engine: nil client")
}
req := &healthpb.HealthCheckRequest{Service: ""}
for {
// Each Check inherits the outer deadline so the loop cannot run
// past it; the per-RPC deadline is the only timeout grpc-go
// honors here.
resp, err := c.Health.Check(ctx, req)
switch {
case err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING:
return nil
case err == nil:
// Engine reachable but not yet SERVING (NOT_SERVING /
// SERVICE_UNKNOWN / UNKNOWN). Keep polling; the engine may
// flip to SERVING once it finishes initialization.
case isTransientHealthError(err):
// Engine still starting up: socket not yet listening, RPC
// queue not yet ready. Sleep and retry.
default:
return fmt.Errorf("engine: health check: %w", err)
}
select {
case <-ctx.Done():
return fmt.Errorf("engine: wait for ready: %w", ctx.Err())
case <-time.After(healthRetryInterval):
}
}
}
// isTransientHealthError reports whether err looks like the engine
// simply has not finished booting yet, so the caller should retry. Any
// other error (Unimplemented, InvalidArgument, ...) is a real failure
// that should surface immediately.
func isTransientHealthError(err error) bool {
if err == nil {
return false
}
switch status.Code(err) {
case codes.Unavailable, codes.DeadlineExceeded, codes.Canceled, codes.ResourceExhausted:
return true
default:
return false
}
}
// emulator.proto is the internal contract between the Go REST gateway and
// the C++ engine. It is intentionally minimal: the gateway owns the
// public-facing BigQuery REST shape, and only forwards the bits that
// actually need GoogleSQL to do their job.
//
// Code generation is wired up via buf (see ../buf.gen.yaml) for the Go
// side and via Bazel (see ./BUILD.bazel) for the C++ side. The Go
// stubs land in gateway/enginepb/ and are checked in so `go build`
// works without an extra codegen step; the C++ stubs are generated
// fresh into the Bazel output tree.
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.11
// protoc v7.35.0
// source: emulator.proto
package enginepb
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type DatasetRef struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
DatasetId string `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DatasetRef) Reset() {
*x = DatasetRef{}
mi := &file_emulator_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DatasetRef) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DatasetRef) ProtoMessage() {}
func (x *DatasetRef) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DatasetRef.ProtoReflect.Descriptor instead.
func (*DatasetRef) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{0}
}
func (x *DatasetRef) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *DatasetRef) GetDatasetId() string {
if x != nil {
return x.DatasetId
}
return ""
}
type TableRef struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
DatasetId string `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
TableId string `protobuf:"bytes,3,opt,name=table_id,json=tableId,proto3" json:"table_id,omitempty"`
// BigQuery REST `type` when known (e.g. VIEW). Empty defaults to TABLE.
TableType string `protobuf:"bytes,4,opt,name=table_type,json=tableType,proto3" json:"table_type,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *TableRef) Reset() {
*x = TableRef{}
mi := &file_emulator_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *TableRef) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*TableRef) ProtoMessage() {}
func (x *TableRef) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use TableRef.ProtoReflect.Descriptor instead.
func (*TableRef) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{1}
}
func (x *TableRef) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *TableRef) GetDatasetId() string {
if x != nil {
return x.DatasetId
}
return ""
}
func (x *TableRef) GetTableId() string {
if x != nil {
return x.TableId
}
return ""
}
func (x *TableRef) GetTableType() string {
if x != nil {
return x.TableType
}
return ""
}
type FieldSchema struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
// BigQuery type names: STRING, INT64, FLOAT64, BOOL, TIMESTAMP, DATE,
// TIME, DATETIME, GEOGRAPHY, BYTES, NUMERIC, BIGNUMERIC, JSON, STRUCT,
// ARRAY, ...
Type string `protobuf:"bytes,2,opt,name=type,proto3" json:"type,omitempty"`
// NULLABLE | REQUIRED | REPEATED. Empty defaults to NULLABLE.
Mode string `protobuf:"bytes,3,opt,name=mode,proto3" json:"mode,omitempty"`
Description string `protobuf:"bytes,4,opt,name=description,proto3" json:"description,omitempty"`
Fields []*FieldSchema `protobuf:"bytes,5,rep,name=fields,proto3" json:"fields,omitempty"` // for STRUCT/RECORD.
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FieldSchema) Reset() {
*x = FieldSchema{}
mi := &file_emulator_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FieldSchema) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FieldSchema) ProtoMessage() {}
func (x *FieldSchema) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FieldSchema.ProtoReflect.Descriptor instead.
func (*FieldSchema) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{2}
}
func (x *FieldSchema) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *FieldSchema) GetType() string {
if x != nil {
return x.Type
}
return ""
}
func (x *FieldSchema) GetMode() string {
if x != nil {
return x.Mode
}
return ""
}
func (x *FieldSchema) GetDescription() string {
if x != nil {
return x.Description
}
return ""
}
func (x *FieldSchema) GetFields() []*FieldSchema {
if x != nil {
return x.Fields
}
return nil
}
type TableSchema struct {
state protoimpl.MessageState `protogen:"open.v1"`
Fields []*FieldSchema `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *TableSchema) Reset() {
*x = TableSchema{}
mi := &file_emulator_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *TableSchema) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*TableSchema) ProtoMessage() {}
func (x *TableSchema) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use TableSchema.ProtoReflect.Descriptor instead.
func (*TableSchema) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{3}
}
func (x *TableSchema) GetFields() []*FieldSchema {
if x != nil {
return x.Fields
}
return nil
}
type RegisterDatasetRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Dataset *DatasetRef `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
Location string `protobuf:"bytes,2,opt,name=location,proto3" json:"location,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RegisterDatasetRequest) Reset() {
*x = RegisterDatasetRequest{}
mi := &file_emulator_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RegisterDatasetRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RegisterDatasetRequest) ProtoMessage() {}
func (x *RegisterDatasetRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RegisterDatasetRequest.ProtoReflect.Descriptor instead.
func (*RegisterDatasetRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{4}
}
func (x *RegisterDatasetRequest) GetDataset() *DatasetRef {
if x != nil {
return x.Dataset
}
return nil
}
func (x *RegisterDatasetRequest) GetLocation() string {
if x != nil {
return x.Location
}
return ""
}
type RegisterDatasetResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RegisterDatasetResponse) Reset() {
*x = RegisterDatasetResponse{}
mi := &file_emulator_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RegisterDatasetResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RegisterDatasetResponse) ProtoMessage() {}
func (x *RegisterDatasetResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RegisterDatasetResponse.ProtoReflect.Descriptor instead.
func (*RegisterDatasetResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{5}
}
type DropDatasetRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Dataset *DatasetRef `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
DeleteContents bool `protobuf:"varint,2,opt,name=delete_contents,json=deleteContents,proto3" json:"delete_contents,omitempty"`
// JSON object snapshot of gateway REST-only dataset metadata (labels,
// friendlyName, ...) captured at delete time for undelete round-trip.
RestMetadataJson string `protobuf:"bytes,3,opt,name=rest_metadata_json,json=restMetadataJson,proto3" json:"rest_metadata_json,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DropDatasetRequest) Reset() {
*x = DropDatasetRequest{}
mi := &file_emulator_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DropDatasetRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DropDatasetRequest) ProtoMessage() {}
func (x *DropDatasetRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[6]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DropDatasetRequest.ProtoReflect.Descriptor instead.
func (*DropDatasetRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{6}
}
func (x *DropDatasetRequest) GetDataset() *DatasetRef {
if x != nil {
return x.Dataset
}
return nil
}
func (x *DropDatasetRequest) GetDeleteContents() bool {
if x != nil {
return x.DeleteContents
}
return false
}
func (x *DropDatasetRequest) GetRestMetadataJson() string {
if x != nil {
return x.RestMetadataJson
}
return ""
}
type DropDatasetResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DropDatasetResponse) Reset() {
*x = DropDatasetResponse{}
mi := &file_emulator_proto_msgTypes[7]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DropDatasetResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DropDatasetResponse) ProtoMessage() {}
func (x *DropDatasetResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[7]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DropDatasetResponse.ProtoReflect.Descriptor instead.
func (*DropDatasetResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{7}
}
type UndeleteDatasetRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Dataset *DatasetRef `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *UndeleteDatasetRequest) Reset() {
*x = UndeleteDatasetRequest{}
mi := &file_emulator_proto_msgTypes[8]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *UndeleteDatasetRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*UndeleteDatasetRequest) ProtoMessage() {}
func (x *UndeleteDatasetRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[8]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use UndeleteDatasetRequest.ProtoReflect.Descriptor instead.
func (*UndeleteDatasetRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{8}
}
func (x *UndeleteDatasetRequest) GetDataset() *DatasetRef {
if x != nil {
return x.Dataset
}
return nil
}
type UndeleteDatasetResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Restored gateway REST-only metadata JSON object from the dataset
// tombstone sidecar (`restMetadata` in `_dataset.meta.json`).
RestMetadataJson string `protobuf:"bytes,1,opt,name=rest_metadata_json,json=restMetadataJson,proto3" json:"rest_metadata_json,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *UndeleteDatasetResponse) Reset() {
*x = UndeleteDatasetResponse{}
mi := &file_emulator_proto_msgTypes[9]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *UndeleteDatasetResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*UndeleteDatasetResponse) ProtoMessage() {}
func (x *UndeleteDatasetResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[9]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use UndeleteDatasetResponse.ProtoReflect.Descriptor instead.
func (*UndeleteDatasetResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{9}
}
func (x *UndeleteDatasetResponse) GetRestMetadataJson() string {
if x != nil {
return x.RestMetadataJson
}
return ""
}
type ListDatasetsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListDatasetsRequest) Reset() {
*x = ListDatasetsRequest{}
mi := &file_emulator_proto_msgTypes[10]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListDatasetsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListDatasetsRequest) ProtoMessage() {}
func (x *ListDatasetsRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[10]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListDatasetsRequest.ProtoReflect.Descriptor instead.
func (*ListDatasetsRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{10}
}
func (x *ListDatasetsRequest) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
type ListDatasetsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Refs are returned in deterministic (lexicographic by dataset_id)
// order so the gateway's listing is stable across calls. See
// backend/storage/storage.h::Storage::ListDatasets.
Datasets []*DatasetRef `protobuf:"bytes,1,rep,name=datasets,proto3" json:"datasets,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListDatasetsResponse) Reset() {
*x = ListDatasetsResponse{}
mi := &file_emulator_proto_msgTypes[11]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListDatasetsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListDatasetsResponse) ProtoMessage() {}
func (x *ListDatasetsResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[11]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListDatasetsResponse.ProtoReflect.Descriptor instead.
func (*ListDatasetsResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{11}
}
func (x *ListDatasetsResponse) GetDatasets() []*DatasetRef {
if x != nil {
return x.Datasets
}
return nil
}
type RegisterTableRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
Schema *TableSchema `protobuf:"bytes,2,opt,name=schema,proto3" json:"schema,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RegisterTableRequest) Reset() {
*x = RegisterTableRequest{}
mi := &file_emulator_proto_msgTypes[12]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RegisterTableRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RegisterTableRequest) ProtoMessage() {}
func (x *RegisterTableRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[12]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RegisterTableRequest.ProtoReflect.Descriptor instead.
func (*RegisterTableRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{12}
}
func (x *RegisterTableRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
func (x *RegisterTableRequest) GetSchema() *TableSchema {
if x != nil {
return x.Schema
}
return nil
}
type RegisterTableResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RegisterTableResponse) Reset() {
*x = RegisterTableResponse{}
mi := &file_emulator_proto_msgTypes[13]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RegisterTableResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RegisterTableResponse) ProtoMessage() {}
func (x *RegisterTableResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[13]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RegisterTableResponse.ProtoReflect.Descriptor instead.
func (*RegisterTableResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{13}
}
type DropTableRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DropTableRequest) Reset() {
*x = DropTableRequest{}
mi := &file_emulator_proto_msgTypes[14]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DropTableRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DropTableRequest) ProtoMessage() {}
func (x *DropTableRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[14]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DropTableRequest.ProtoReflect.Descriptor instead.
func (*DropTableRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{14}
}
func (x *DropTableRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
type DropTableResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DropTableResponse) Reset() {
*x = DropTableResponse{}
mi := &file_emulator_proto_msgTypes[15]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DropTableResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DropTableResponse) ProtoMessage() {}
func (x *DropTableResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[15]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DropTableResponse.ProtoReflect.Descriptor instead.
func (*DropTableResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{15}
}
type ListTablesRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Dataset *DatasetRef `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListTablesRequest) Reset() {
*x = ListTablesRequest{}
mi := &file_emulator_proto_msgTypes[16]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListTablesRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListTablesRequest) ProtoMessage() {}
func (x *ListTablesRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[16]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListTablesRequest.ProtoReflect.Descriptor instead.
func (*ListTablesRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{16}
}
func (x *ListTablesRequest) GetDataset() *DatasetRef {
if x != nil {
return x.Dataset
}
return nil
}
type ListTablesResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Refs are returned in deterministic (lexicographic by table_id)
// order. See backend/storage/storage.h::Storage::ListTables.
Tables []*TableRef `protobuf:"bytes,1,rep,name=tables,proto3" json:"tables,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListTablesResponse) Reset() {
*x = ListTablesResponse{}
mi := &file_emulator_proto_msgTypes[17]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListTablesResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListTablesResponse) ProtoMessage() {}
func (x *ListTablesResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[17]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListTablesResponse.ProtoReflect.Descriptor instead.
func (*ListTablesResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{17}
}
func (x *ListTablesResponse) GetTables() []*TableRef {
if x != nil {
return x.Tables
}
return nil
}
type DescribeTableRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DescribeTableRequest) Reset() {
*x = DescribeTableRequest{}
mi := &file_emulator_proto_msgTypes[18]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DescribeTableRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DescribeTableRequest) ProtoMessage() {}
func (x *DescribeTableRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[18]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DescribeTableRequest.ProtoReflect.Descriptor instead.
func (*DescribeTableRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{18}
}
func (x *DescribeTableRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
type DescribeTableResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Schema *TableSchema `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
// BigQuery REST `type` when the table is a logical view (empty for
// physical tables). Populated by Catalog.DescribeTable when the
// target is registered in the view registry rather than storage.
TableType string `protobuf:"bytes,2,opt,name=table_type,json=tableType,proto3" json:"table_type,omitempty"`
// View SQL (`view.query` on the REST Table resource).
ViewQuery string `protobuf:"bytes,3,opt,name=view_query,json=viewQuery,proto3" json:"view_query,omitempty"`
// Always false for GoogleSQL views; carried for REST parity.
ViewUseLegacySql bool `protobuf:"varint,4,opt,name=view_use_legacy_sql,json=viewUseLegacySql,proto3" json:"view_use_legacy_sql,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DescribeTableResponse) Reset() {
*x = DescribeTableResponse{}
mi := &file_emulator_proto_msgTypes[19]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DescribeTableResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DescribeTableResponse) ProtoMessage() {}
func (x *DescribeTableResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[19]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DescribeTableResponse.ProtoReflect.Descriptor instead.
func (*DescribeTableResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{19}
}
func (x *DescribeTableResponse) GetSchema() *TableSchema {
if x != nil {
return x.Schema
}
return nil
}
func (x *DescribeTableResponse) GetTableType() string {
if x != nil {
return x.TableType
}
return ""
}
func (x *DescribeTableResponse) GetViewQuery() string {
if x != nil {
return x.ViewQuery
}
return ""
}
func (x *DescribeTableResponse) GetViewUseLegacySql() bool {
if x != nil {
return x.ViewUseLegacySql
}
return false
}
// DataRow is a single row of stored data. Cells are positional and
// align with the columns of the target table's `TableSchema`. The
// `Cell` shape is shared with `Query`'s result rows so the wire
// types stay consistent across the two read paths.
type DataRow struct {
state protoimpl.MessageState `protogen:"open.v1"`
Cells []*Cell `protobuf:"bytes,1,rep,name=cells,proto3" json:"cells,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DataRow) Reset() {
*x = DataRow{}
mi := &file_emulator_proto_msgTypes[20]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DataRow) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DataRow) ProtoMessage() {}
func (x *DataRow) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[20]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DataRow.ProtoReflect.Descriptor instead.
func (*DataRow) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{20}
}
func (x *DataRow) GetCells() []*Cell {
if x != nil {
return x.Cells
}
return nil
}
type InsertRowsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
Rows []*DataRow `protobuf:"bytes,2,rep,name=rows,proto3" json:"rows,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *InsertRowsRequest) Reset() {
*x = InsertRowsRequest{}
mi := &file_emulator_proto_msgTypes[21]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *InsertRowsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*InsertRowsRequest) ProtoMessage() {}
func (x *InsertRowsRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[21]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use InsertRowsRequest.ProtoReflect.Descriptor instead.
func (*InsertRowsRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{21}
}
func (x *InsertRowsRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
func (x *InsertRowsRequest) GetRows() []*DataRow {
if x != nil {
return x.Rows
}
return nil
}
type InsertRowsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *InsertRowsResponse) Reset() {
*x = InsertRowsResponse{}
mi := &file_emulator_proto_msgTypes[22]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *InsertRowsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*InsertRowsResponse) ProtoMessage() {}
func (x *InsertRowsResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[22]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use InsertRowsResponse.ProtoReflect.Descriptor instead.
func (*InsertRowsResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{22}
}
type ListRowsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
// Row index of the first row to return (0-based). Rows before this
// index are skipped.
StartIndex int64 `protobuf:"varint,2,opt,name=start_index,json=startIndex,proto3" json:"start_index,omitempty"`
// Maximum number of rows in the response. A non-positive value
// returns every remaining row from `start_index`.
MaxResults int64 `protobuf:"varint,3,opt,name=max_results,json=maxResults,proto3" json:"max_results,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListRowsRequest) Reset() {
*x = ListRowsRequest{}
mi := &file_emulator_proto_msgTypes[23]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListRowsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListRowsRequest) ProtoMessage() {}
func (x *ListRowsRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[23]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListRowsRequest.ProtoReflect.Descriptor instead.
func (*ListRowsRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{23}
}
func (x *ListRowsRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
func (x *ListRowsRequest) GetStartIndex() int64 {
if x != nil {
return x.StartIndex
}
return 0
}
func (x *ListRowsRequest) GetMaxResults() int64 {
if x != nil {
return x.MaxResults
}
return 0
}
type RoutineRef struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
DatasetId string `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
RoutineId string `protobuf:"bytes,3,opt,name=routine_id,json=routineId,proto3" json:"routine_id,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RoutineRef) Reset() {
*x = RoutineRef{}
mi := &file_emulator_proto_msgTypes[24]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RoutineRef) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RoutineRef) ProtoMessage() {}
func (x *RoutineRef) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[24]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RoutineRef.ProtoReflect.Descriptor instead.
func (*RoutineRef) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{24}
}
func (x *RoutineRef) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *RoutineRef) GetDatasetId() string {
if x != nil {
return x.DatasetId
}
return ""
}
func (x *RoutineRef) GetRoutineId() string {
if x != nil {
return x.RoutineId
}
return ""
}
type RoutineDescriptor struct {
state protoimpl.MessageState `protogen:"open.v1"`
Routine *RoutineRef `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
// SCALAR_FUNCTION | AGGREGATE_FUNCTION | TABLE_VALUED_FUNCTION | PROCEDURE
RoutineType string `protobuf:"bytes,2,opt,name=routine_type,json=routineType,proto3" json:"routine_type,omitempty"`
Language string `protobuf:"bytes,3,opt,name=language,proto3" json:"language,omitempty"`
DefinitionBody string `protobuf:"bytes,4,opt,name=definition_body,json=definitionBody,proto3" json:"definition_body,omitempty"`
DdlSql string `protobuf:"bytes,5,opt,name=ddl_sql,json=ddlSql,proto3" json:"ddl_sql,omitempty"`
SignatureJson string `protobuf:"bytes,6,opt,name=signature_json,json=signatureJson,proto3" json:"signature_json,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RoutineDescriptor) Reset() {
*x = RoutineDescriptor{}
mi := &file_emulator_proto_msgTypes[25]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RoutineDescriptor) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RoutineDescriptor) ProtoMessage() {}
func (x *RoutineDescriptor) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[25]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RoutineDescriptor.ProtoReflect.Descriptor instead.
func (*RoutineDescriptor) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{25}
}
func (x *RoutineDescriptor) GetRoutine() *RoutineRef {
if x != nil {
return x.Routine
}
return nil
}
func (x *RoutineDescriptor) GetRoutineType() string {
if x != nil {
return x.RoutineType
}
return ""
}
func (x *RoutineDescriptor) GetLanguage() string {
if x != nil {
return x.Language
}
return ""
}
func (x *RoutineDescriptor) GetDefinitionBody() string {
if x != nil {
return x.DefinitionBody
}
return ""
}
func (x *RoutineDescriptor) GetDdlSql() string {
if x != nil {
return x.DdlSql
}
return ""
}
func (x *RoutineDescriptor) GetSignatureJson() string {
if x != nil {
return x.SignatureJson
}
return ""
}
type ListRoutinesRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Dataset *DatasetRef `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListRoutinesRequest) Reset() {
*x = ListRoutinesRequest{}
mi := &file_emulator_proto_msgTypes[26]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListRoutinesRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListRoutinesRequest) ProtoMessage() {}
func (x *ListRoutinesRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[26]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListRoutinesRequest.ProtoReflect.Descriptor instead.
func (*ListRoutinesRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{26}
}
func (x *ListRoutinesRequest) GetDataset() *DatasetRef {
if x != nil {
return x.Dataset
}
return nil
}
type ListRoutinesResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Routines []*RoutineDescriptor `protobuf:"bytes,1,rep,name=routines,proto3" json:"routines,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListRoutinesResponse) Reset() {
*x = ListRoutinesResponse{}
mi := &file_emulator_proto_msgTypes[27]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListRoutinesResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListRoutinesResponse) ProtoMessage() {}
func (x *ListRoutinesResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[27]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListRoutinesResponse.ProtoReflect.Descriptor instead.
func (*ListRoutinesResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{27}
}
func (x *ListRoutinesResponse) GetRoutines() []*RoutineDescriptor {
if x != nil {
return x.Routines
}
return nil
}
type GetRoutineRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Routine *RoutineRef `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *GetRoutineRequest) Reset() {
*x = GetRoutineRequest{}
mi := &file_emulator_proto_msgTypes[28]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *GetRoutineRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*GetRoutineRequest) ProtoMessage() {}
func (x *GetRoutineRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[28]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use GetRoutineRequest.ProtoReflect.Descriptor instead.
func (*GetRoutineRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{28}
}
func (x *GetRoutineRequest) GetRoutine() *RoutineRef {
if x != nil {
return x.Routine
}
return nil
}
type GetRoutineResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Routine *RoutineDescriptor `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *GetRoutineResponse) Reset() {
*x = GetRoutineResponse{}
mi := &file_emulator_proto_msgTypes[29]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *GetRoutineResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*GetRoutineResponse) ProtoMessage() {}
func (x *GetRoutineResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[29]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use GetRoutineResponse.ProtoReflect.Descriptor instead.
func (*GetRoutineResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{29}
}
func (x *GetRoutineResponse) GetRoutine() *RoutineDescriptor {
if x != nil {
return x.Routine
}
return nil
}
type UpsertRoutineRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Routine *RoutineDescriptor `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *UpsertRoutineRequest) Reset() {
*x = UpsertRoutineRequest{}
mi := &file_emulator_proto_msgTypes[30]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *UpsertRoutineRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*UpsertRoutineRequest) ProtoMessage() {}
func (x *UpsertRoutineRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[30]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use UpsertRoutineRequest.ProtoReflect.Descriptor instead.
func (*UpsertRoutineRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{30}
}
func (x *UpsertRoutineRequest) GetRoutine() *RoutineDescriptor {
if x != nil {
return x.Routine
}
return nil
}
type UpsertRoutineResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *UpsertRoutineResponse) Reset() {
*x = UpsertRoutineResponse{}
mi := &file_emulator_proto_msgTypes[31]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *UpsertRoutineResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*UpsertRoutineResponse) ProtoMessage() {}
func (x *UpsertRoutineResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[31]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use UpsertRoutineResponse.ProtoReflect.Descriptor instead.
func (*UpsertRoutineResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{31}
}
type DeleteRoutineRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Routine *RoutineRef `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DeleteRoutineRequest) Reset() {
*x = DeleteRoutineRequest{}
mi := &file_emulator_proto_msgTypes[32]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DeleteRoutineRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DeleteRoutineRequest) ProtoMessage() {}
func (x *DeleteRoutineRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[32]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DeleteRoutineRequest.ProtoReflect.Descriptor instead.
func (*DeleteRoutineRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{32}
}
func (x *DeleteRoutineRequest) GetRoutine() *RoutineRef {
if x != nil {
return x.Routine
}
return nil
}
type DeleteRoutineResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DeleteRoutineResponse) Reset() {
*x = DeleteRoutineResponse{}
mi := &file_emulator_proto_msgTypes[33]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DeleteRoutineResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DeleteRoutineResponse) ProtoMessage() {}
func (x *DeleteRoutineResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[33]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DeleteRoutineResponse.ProtoReflect.Descriptor instead.
func (*DeleteRoutineResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{33}
}
type RowAccessPolicy struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
PolicyId string `protobuf:"bytes,2,opt,name=policy_id,json=policyId,proto3" json:"policy_id,omitempty"`
FilterPredicate string `protobuf:"bytes,3,opt,name=filter_predicate,json=filterPredicate,proto3" json:"filter_predicate,omitempty"`
Grantees []string `protobuf:"bytes,4,rep,name=grantees,proto3" json:"grantees,omitempty"`
CreationTimeMs int64 `protobuf:"varint,5,opt,name=creation_time_ms,json=creationTimeMs,proto3" json:"creation_time_ms,omitempty"`
LastModifiedTimeMs int64 `protobuf:"varint,6,opt,name=last_modified_time_ms,json=lastModifiedTimeMs,proto3" json:"last_modified_time_ms,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *RowAccessPolicy) Reset() {
*x = RowAccessPolicy{}
mi := &file_emulator_proto_msgTypes[34]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *RowAccessPolicy) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*RowAccessPolicy) ProtoMessage() {}
func (x *RowAccessPolicy) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[34]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use RowAccessPolicy.ProtoReflect.Descriptor instead.
func (*RowAccessPolicy) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{34}
}
func (x *RowAccessPolicy) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
func (x *RowAccessPolicy) GetPolicyId() string {
if x != nil {
return x.PolicyId
}
return ""
}
func (x *RowAccessPolicy) GetFilterPredicate() string {
if x != nil {
return x.FilterPredicate
}
return ""
}
func (x *RowAccessPolicy) GetGrantees() []string {
if x != nil {
return x.Grantees
}
return nil
}
func (x *RowAccessPolicy) GetCreationTimeMs() int64 {
if x != nil {
return x.CreationTimeMs
}
return 0
}
func (x *RowAccessPolicy) GetLastModifiedTimeMs() int64 {
if x != nil {
return x.LastModifiedTimeMs
}
return 0
}
type UpsertRowAccessPolicyRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Policy *RowAccessPolicy `protobuf:"bytes,1,opt,name=policy,proto3" json:"policy,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *UpsertRowAccessPolicyRequest) Reset() {
*x = UpsertRowAccessPolicyRequest{}
mi := &file_emulator_proto_msgTypes[35]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *UpsertRowAccessPolicyRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*UpsertRowAccessPolicyRequest) ProtoMessage() {}
func (x *UpsertRowAccessPolicyRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[35]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use UpsertRowAccessPolicyRequest.ProtoReflect.Descriptor instead.
func (*UpsertRowAccessPolicyRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{35}
}
func (x *UpsertRowAccessPolicyRequest) GetPolicy() *RowAccessPolicy {
if x != nil {
return x.Policy
}
return nil
}
type UpsertRowAccessPolicyResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Policy *RowAccessPolicy `protobuf:"bytes,1,opt,name=policy,proto3" json:"policy,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *UpsertRowAccessPolicyResponse) Reset() {
*x = UpsertRowAccessPolicyResponse{}
mi := &file_emulator_proto_msgTypes[36]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *UpsertRowAccessPolicyResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*UpsertRowAccessPolicyResponse) ProtoMessage() {}
func (x *UpsertRowAccessPolicyResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[36]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use UpsertRowAccessPolicyResponse.ProtoReflect.Descriptor instead.
func (*UpsertRowAccessPolicyResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{36}
}
func (x *UpsertRowAccessPolicyResponse) GetPolicy() *RowAccessPolicy {
if x != nil {
return x.Policy
}
return nil
}
type DeleteRowAccessPolicyRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
PolicyId string `protobuf:"bytes,2,opt,name=policy_id,json=policyId,proto3" json:"policy_id,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DeleteRowAccessPolicyRequest) Reset() {
*x = DeleteRowAccessPolicyRequest{}
mi := &file_emulator_proto_msgTypes[37]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DeleteRowAccessPolicyRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DeleteRowAccessPolicyRequest) ProtoMessage() {}
func (x *DeleteRowAccessPolicyRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[37]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DeleteRowAccessPolicyRequest.ProtoReflect.Descriptor instead.
func (*DeleteRowAccessPolicyRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{37}
}
func (x *DeleteRowAccessPolicyRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
func (x *DeleteRowAccessPolicyRequest) GetPolicyId() string {
if x != nil {
return x.PolicyId
}
return ""
}
type DeleteRowAccessPolicyResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DeleteRowAccessPolicyResponse) Reset() {
*x = DeleteRowAccessPolicyResponse{}
mi := &file_emulator_proto_msgTypes[38]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DeleteRowAccessPolicyResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DeleteRowAccessPolicyResponse) ProtoMessage() {}
func (x *DeleteRowAccessPolicyResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[38]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DeleteRowAccessPolicyResponse.ProtoReflect.Descriptor instead.
func (*DeleteRowAccessPolicyResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{38}
}
type ListRowAccessPoliciesRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListRowAccessPoliciesRequest) Reset() {
*x = ListRowAccessPoliciesRequest{}
mi := &file_emulator_proto_msgTypes[39]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListRowAccessPoliciesRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListRowAccessPoliciesRequest) ProtoMessage() {}
func (x *ListRowAccessPoliciesRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[39]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListRowAccessPoliciesRequest.ProtoReflect.Descriptor instead.
func (*ListRowAccessPoliciesRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{39}
}
func (x *ListRowAccessPoliciesRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
type ListRowAccessPoliciesResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Policies []*RowAccessPolicy `protobuf:"bytes,1,rep,name=policies,proto3" json:"policies,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListRowAccessPoliciesResponse) Reset() {
*x = ListRowAccessPoliciesResponse{}
mi := &file_emulator_proto_msgTypes[40]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListRowAccessPoliciesResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListRowAccessPoliciesResponse) ProtoMessage() {}
func (x *ListRowAccessPoliciesResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[40]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListRowAccessPoliciesResponse.ProtoReflect.Descriptor instead.
func (*ListRowAccessPoliciesResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{40}
}
func (x *ListRowAccessPoliciesResponse) GetPolicies() []*RowAccessPolicy {
if x != nil {
return x.Policies
}
return nil
}
type ColumnGovernance struct {
state protoimpl.MessageState `protogen:"open.v1"`
ColumnName string `protobuf:"bytes,1,opt,name=column_name,json=columnName,proto3" json:"column_name,omitempty"`
PolicyTags []string `protobuf:"bytes,2,rep,name=policy_tags,json=policyTags,proto3" json:"policy_tags,omitempty"`
// NULLIFY | SHA256 | DEFAULT_VALUE | DENIED | NONE
MaskKind string `protobuf:"bytes,3,opt,name=mask_kind,json=maskKind,proto3" json:"mask_kind,omitempty"`
MaskGrantees []string `protobuf:"bytes,4,rep,name=mask_grantees,json=maskGrantees,proto3" json:"mask_grantees,omitempty"`
DefaultMaskValue string `protobuf:"bytes,5,opt,name=default_mask_value,json=defaultMaskValue,proto3" json:"default_mask_value,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ColumnGovernance) Reset() {
*x = ColumnGovernance{}
mi := &file_emulator_proto_msgTypes[41]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ColumnGovernance) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ColumnGovernance) ProtoMessage() {}
func (x *ColumnGovernance) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[41]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ColumnGovernance.ProtoReflect.Descriptor instead.
func (*ColumnGovernance) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{41}
}
func (x *ColumnGovernance) GetColumnName() string {
if x != nil {
return x.ColumnName
}
return ""
}
func (x *ColumnGovernance) GetPolicyTags() []string {
if x != nil {
return x.PolicyTags
}
return nil
}
func (x *ColumnGovernance) GetMaskKind() string {
if x != nil {
return x.MaskKind
}
return ""
}
func (x *ColumnGovernance) GetMaskGrantees() []string {
if x != nil {
return x.MaskGrantees
}
return nil
}
func (x *ColumnGovernance) GetDefaultMaskValue() string {
if x != nil {
return x.DefaultMaskValue
}
return ""
}
type SetColumnGovernanceRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Table *TableRef `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
Column *ColumnGovernance `protobuf:"bytes,2,opt,name=column,proto3" json:"column,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SetColumnGovernanceRequest) Reset() {
*x = SetColumnGovernanceRequest{}
mi := &file_emulator_proto_msgTypes[42]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SetColumnGovernanceRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SetColumnGovernanceRequest) ProtoMessage() {}
func (x *SetColumnGovernanceRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[42]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SetColumnGovernanceRequest.ProtoReflect.Descriptor instead.
func (*SetColumnGovernanceRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{42}
}
func (x *SetColumnGovernanceRequest) GetTable() *TableRef {
if x != nil {
return x.Table
}
return nil
}
func (x *SetColumnGovernanceRequest) GetColumn() *ColumnGovernance {
if x != nil {
return x.Column
}
return nil
}
type SetColumnGovernanceResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SetColumnGovernanceResponse) Reset() {
*x = SetColumnGovernanceResponse{}
mi := &file_emulator_proto_msgTypes[43]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SetColumnGovernanceResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SetColumnGovernanceResponse) ProtoMessage() {}
func (x *SetColumnGovernanceResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[43]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SetColumnGovernanceResponse.ProtoReflect.Descriptor instead.
func (*SetColumnGovernanceResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{43}
}
type ListRowsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Rows []*DataRow `protobuf:"bytes,1,rep,name=rows,proto3" json:"rows,omitempty"`
// Total number of rows in the table at the moment the request was
// served; the gateway surfaces this as `totalRows` in the REST
// response.
TotalRows int64 `protobuf:"varint,2,opt,name=total_rows,json=totalRows,proto3" json:"total_rows,omitempty"`
// One past the index of the last row returned. The gateway uses
// this to emit a `pageToken` (when `next_start_index < total_rows`)
// or omit it (when the page reached the end of the table).
NextStartIndex int64 `protobuf:"varint,3,opt,name=next_start_index,json=nextStartIndex,proto3" json:"next_start_index,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ListRowsResponse) Reset() {
*x = ListRowsResponse{}
mi := &file_emulator_proto_msgTypes[44]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ListRowsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ListRowsResponse) ProtoMessage() {}
func (x *ListRowsResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[44]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ListRowsResponse.ProtoReflect.Descriptor instead.
func (*ListRowsResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{44}
}
func (x *ListRowsResponse) GetRows() []*DataRow {
if x != nil {
return x.Rows
}
return nil
}
func (x *ListRowsResponse) GetTotalRows() int64 {
if x != nil {
return x.TotalRows
}
return 0
}
func (x *ListRowsResponse) GetNextStartIndex() int64 {
if x != nil {
return x.NextStartIndex
}
return 0
}
type QueryRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
// Default dataset for unqualified table references.
DefaultDatasetId string `protobuf:"bytes,2,opt,name=default_dataset_id,json=defaultDatasetId,proto3" json:"default_dataset_id,omitempty"`
Sql string `protobuf:"bytes,3,opt,name=sql,proto3" json:"sql,omitempty"`
// Optional: query parameters keyed by name. Positional parameters use
// an empty key (BigQuery's @0, @1, ... convention is unrolled here).
Parameters map[string]*QueryParameter `protobuf:"bytes,4,rep,name=parameters,proto3" json:"parameters,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
UseLegacySql bool `protobuf:"varint,5,opt,name=use_legacy_sql,json=useLegacySql,proto3" json:"use_legacy_sql,omitempty"`
// Synthetic caller email from the gateway auth middleware. Empty
// defaults to emulator@bigquery.local at the engine boundary.
PrincipalEmail string `protobuf:"bytes,6,opt,name=principal_email,json=principalEmail,proto3" json:"principal_email,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *QueryRequest) Reset() {
*x = QueryRequest{}
mi := &file_emulator_proto_msgTypes[45]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *QueryRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*QueryRequest) ProtoMessage() {}
func (x *QueryRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[45]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use QueryRequest.ProtoReflect.Descriptor instead.
func (*QueryRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{45}
}
func (x *QueryRequest) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *QueryRequest) GetDefaultDatasetId() string {
if x != nil {
return x.DefaultDatasetId
}
return ""
}
func (x *QueryRequest) GetSql() string {
if x != nil {
return x.Sql
}
return ""
}
func (x *QueryRequest) GetParameters() map[string]*QueryParameter {
if x != nil {
return x.Parameters
}
return nil
}
func (x *QueryRequest) GetUseLegacySql() bool {
if x != nil {
return x.UseLegacySql
}
return false
}
func (x *QueryRequest) GetPrincipalEmail() string {
if x != nil {
return x.PrincipalEmail
}
return ""
}
type QueryParameter struct {
state protoimpl.MessageState `protogen:"open.v1"`
TypeKind string `protobuf:"bytes,1,opt,name=type_kind,json=typeKind,proto3" json:"type_kind,omitempty"` // googlesql TypeKind name (e.g. INT64, STRING).
ValueJson string `protobuf:"bytes,2,opt,name=value_json,json=valueJson,proto3" json:"value_json,omitempty"` // JSON-encoded literal value.
// Optional STRUCT/ARRAY field-type descriptor (gateway-encoded from
// REST `parameterType`; empty for scalar parameters).
TypeJson string `protobuf:"bytes,3,opt,name=type_json,json=typeJson,proto3" json:"type_json,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *QueryParameter) Reset() {
*x = QueryParameter{}
mi := &file_emulator_proto_msgTypes[46]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *QueryParameter) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*QueryParameter) ProtoMessage() {}
func (x *QueryParameter) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[46]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use QueryParameter.ProtoReflect.Descriptor instead.
func (*QueryParameter) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{46}
}
func (x *QueryParameter) GetTypeKind() string {
if x != nil {
return x.TypeKind
}
return ""
}
func (x *QueryParameter) GetValueJson() string {
if x != nil {
return x.ValueJson
}
return ""
}
func (x *QueryParameter) GetTypeJson() string {
if x != nil {
return x.TypeJson
}
return ""
}
type DryRunResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Schema *TableSchema `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
EstimatedBytesProcessed int64 `protobuf:"varint,2,opt,name=estimated_bytes_processed,json=estimatedBytesProcessed,proto3" json:"estimated_bytes_processed,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DryRunResponse) Reset() {
*x = DryRunResponse{}
mi := &file_emulator_proto_msgTypes[47]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DryRunResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DryRunResponse) ProtoMessage() {}
func (x *DryRunResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[47]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DryRunResponse.ProtoReflect.Descriptor instead.
func (*DryRunResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{47}
}
func (x *DryRunResponse) GetSchema() *TableSchema {
if x != nil {
return x.Schema
}
return nil
}
func (x *DryRunResponse) GetEstimatedBytesProcessed() int64 {
if x != nil {
return x.EstimatedBytesProcessed
}
return 0
}
type QueryResultRow struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The stream carries one of five message kinds:
// - `schema` only — emitted as the first message of every
// `ExecuteQuery` reply for SELECT-shaped queries.
// - `cells` only — emitted once per result row.
// - `dml_stats` only — emitted as the final message of an
// INSERT / UPDATE / DELETE / MERGE reply, after any optional
// THEN-RETURN rows. Carries the per-statement modification
// counts the gateway folds into BigQuery's REST `dmlStats`
// and `numDmlAffectedRows` fields.
// - `statement_type` only — emitted as the trailing message of
// every successful reply (SELECT, DML, and DDL alike). The
// value is one of the canonical BigQuery REST statement-type
// strings (`SELECT`, `INSERT`, `CREATE_TABLE`, ...) the
// gateway folds into the
// `QueryResponse.statistics.query.statementType` envelope so
// callers can tell DDL / metadata / catalog operations apart
// from SELECTs and DML at the response layer. Routes that
// produce no recognizable BigQuery shape (today: nothing in
// the supported surface) leave the field empty so the gateway
// omits the envelope entirely.
// - `phase_timings` only — emitted after the last data row (or
// dml_stats) and before the `statement_type` trailer. Carries
// per-phase wall times in microseconds for emulator-internal
// performance debugging; the gateway surfaces this on loopback
// callers only as `Job.statistics.query.emulatorPhases`.
// - `emulator_route` only — emitted alongside `statement_type`
// as the trailing pair of every successful reply. The value
// is the canonical lowercase-snake spelling of the
// `Disposition` the coordinator's `RouteClassifier` chose
// (`duckdb_native`, `duckdb_rewrite`, `duckdb_udf`,
// `semantic_executor`, `control_op`, `local_stub`,
// `unsupported`; mirrors `backend/engine/disposition.cc`'s
// `DispositionToString`). The gateway surfaces this on the
// `Job.statistics.query.emulatorRoute` field only to
// loopback callers (see
// `gateway/middleware/emulator_route.go`) so it stays an
// emulator-internal debug signal the conformance harness
// reads back to assert per-query routing decisions, never a
// stable wire field for BigQuery client libraries.
Schema *TableSchema `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
Cells []*Cell `protobuf:"bytes,2,rep,name=cells,proto3" json:"cells,omitempty"`
DmlStats *DmlStats `protobuf:"bytes,3,opt,name=dml_stats,json=dmlStats,proto3" json:"dml_stats,omitempty"`
StatementType string `protobuf:"bytes,4,opt,name=statement_type,json=statementType,proto3" json:"statement_type,omitempty"`
EmulatorRoute string `protobuf:"bytes,5,opt,name=emulator_route,json=emulatorRoute,proto3" json:"emulator_route,omitempty"`
PhaseTimings *PhaseTimings `protobuf:"bytes,6,opt,name=phase_timings,json=phaseTimings,proto3" json:"phase_timings,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *QueryResultRow) Reset() {
*x = QueryResultRow{}
mi := &file_emulator_proto_msgTypes[48]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *QueryResultRow) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*QueryResultRow) ProtoMessage() {}
func (x *QueryResultRow) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[48]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use QueryResultRow.ProtoReflect.Descriptor instead.
func (*QueryResultRow) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{48}
}
func (x *QueryResultRow) GetSchema() *TableSchema {
if x != nil {
return x.Schema
}
return nil
}
func (x *QueryResultRow) GetCells() []*Cell {
if x != nil {
return x.Cells
}
return nil
}
func (x *QueryResultRow) GetDmlStats() *DmlStats {
if x != nil {
return x.DmlStats
}
return nil
}
func (x *QueryResultRow) GetStatementType() string {
if x != nil {
return x.StatementType
}
return ""
}
func (x *QueryResultRow) GetEmulatorRoute() string {
if x != nil {
return x.EmulatorRoute
}
return ""
}
func (x *QueryResultRow) GetPhaseTimings() *PhaseTimings {
if x != nil {
return x.PhaseTimings
}
return nil
}
type PhaseTiming struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
DurationUs int64 `protobuf:"varint,2,opt,name=duration_us,json=durationUs,proto3" json:"duration_us,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *PhaseTiming) Reset() {
*x = PhaseTiming{}
mi := &file_emulator_proto_msgTypes[49]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *PhaseTiming) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PhaseTiming) ProtoMessage() {}
func (x *PhaseTiming) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[49]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PhaseTiming.ProtoReflect.Descriptor instead.
func (*PhaseTiming) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{49}
}
func (x *PhaseTiming) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *PhaseTiming) GetDurationUs() int64 {
if x != nil {
return x.DurationUs
}
return 0
}
type PhaseTimings struct {
state protoimpl.MessageState `protogen:"open.v1"`
Phases []*PhaseTiming `protobuf:"bytes,1,rep,name=phases,proto3" json:"phases,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *PhaseTimings) Reset() {
*x = PhaseTimings{}
mi := &file_emulator_proto_msgTypes[50]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *PhaseTimings) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*PhaseTimings) ProtoMessage() {}
func (x *PhaseTimings) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[50]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use PhaseTimings.ProtoReflect.Descriptor instead.
func (*PhaseTimings) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{50}
}
func (x *PhaseTimings) GetPhases() []*PhaseTiming {
if x != nil {
return x.Phases
}
return nil
}
// DmlStats is the engine's report of how many rows a DML statement
// (INSERT / UPDATE / DELETE / MERGE) modified. Mirrors the BigQuery
// REST `Job.statistics.query.dmlStats` shape (see
// docs/bigquery/docs/reference/rest/v2/DmlStats.md): the counts are
// 64-bit because BigQuery exposes them as decimal strings on the
// wire and the gateway formats them with `strconv.FormatInt` from
// these int64 fields.
type DmlStats struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Number of rows added by INSERT / MERGE-INSERT branches.
InsertedRowCount int64 `protobuf:"varint,1,opt,name=inserted_row_count,json=insertedRowCount,proto3" json:"inserted_row_count,omitempty"`
// Number of rows updated by UPDATE / MERGE-UPDATE branches.
UpdatedRowCount int64 `protobuf:"varint,2,opt,name=updated_row_count,json=updatedRowCount,proto3" json:"updated_row_count,omitempty"`
// Number of rows removed by DELETE / MERGE-DELETE branches.
DeletedRowCount int64 `protobuf:"varint,3,opt,name=deleted_row_count,json=deletedRowCount,proto3" json:"deleted_row_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *DmlStats) Reset() {
*x = DmlStats{}
mi := &file_emulator_proto_msgTypes[51]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *DmlStats) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*DmlStats) ProtoMessage() {}
func (x *DmlStats) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[51]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use DmlStats.ProtoReflect.Descriptor instead.
func (*DmlStats) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{51}
}
func (x *DmlStats) GetInsertedRowCount() int64 {
if x != nil {
return x.InsertedRowCount
}
return 0
}
func (x *DmlStats) GetUpdatedRowCount() int64 {
if x != nil {
return x.UpdatedRowCount
}
return 0
}
func (x *DmlStats) GetDeletedRowCount() int64 {
if x != nil {
return x.DeletedRowCount
}
return 0
}
type Cell struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Wire shape: either `string_value`, `null_value`, `array`, or `struct`
// is set. Mirrors googlesql::Value's flavor of variant.
//
// Types that are valid to be assigned to Value:
//
// *Cell_StringValue
// *Cell_NullValue
// *Cell_Array
// *Cell_StructValue
Value isCell_Value `protobuf_oneof:"value"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Cell) Reset() {
*x = Cell{}
mi := &file_emulator_proto_msgTypes[52]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Cell) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Cell) ProtoMessage() {}
func (x *Cell) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[52]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Cell.ProtoReflect.Descriptor instead.
func (*Cell) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{52}
}
func (x *Cell) GetValue() isCell_Value {
if x != nil {
return x.Value
}
return nil
}
func (x *Cell) GetStringValue() string {
if x != nil {
if x, ok := x.Value.(*Cell_StringValue); ok {
return x.StringValue
}
}
return ""
}
func (x *Cell) GetNullValue() bool {
if x != nil {
if x, ok := x.Value.(*Cell_NullValue); ok {
return x.NullValue
}
}
return false
}
func (x *Cell) GetArray() *Array {
if x != nil {
if x, ok := x.Value.(*Cell_Array); ok {
return x.Array
}
}
return nil
}
func (x *Cell) GetStructValue() *Struct {
if x != nil {
if x, ok := x.Value.(*Cell_StructValue); ok {
return x.StructValue
}
}
return nil
}
type isCell_Value interface {
isCell_Value()
}
type Cell_StringValue struct {
StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3,oneof"`
}
type Cell_NullValue struct {
NullValue bool `protobuf:"varint,2,opt,name=null_value,json=nullValue,proto3,oneof"`
}
type Cell_Array struct {
Array *Array `protobuf:"bytes,3,opt,name=array,proto3,oneof"`
}
type Cell_StructValue struct {
StructValue *Struct `protobuf:"bytes,4,opt,name=struct_value,json=structValue,proto3,oneof"`
}
func (*Cell_StringValue) isCell_Value() {}
func (*Cell_NullValue) isCell_Value() {}
func (*Cell_Array) isCell_Value() {}
func (*Cell_StructValue) isCell_Value() {}
type Array struct {
state protoimpl.MessageState `protogen:"open.v1"`
Elements []*Cell `protobuf:"bytes,1,rep,name=elements,proto3" json:"elements,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Array) Reset() {
*x = Array{}
mi := &file_emulator_proto_msgTypes[53]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Array) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Array) ProtoMessage() {}
func (x *Array) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[53]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Array.ProtoReflect.Descriptor instead.
func (*Array) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{53}
}
func (x *Array) GetElements() []*Cell {
if x != nil {
return x.Elements
}
return nil
}
type Struct struct {
state protoimpl.MessageState `protogen:"open.v1"`
Fields []*Cell `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *Struct) Reset() {
*x = Struct{}
mi := &file_emulator_proto_msgTypes[54]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *Struct) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*Struct) ProtoMessage() {}
func (x *Struct) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[54]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use Struct.ProtoReflect.Descriptor instead.
func (*Struct) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{54}
}
func (x *Struct) GetFields() []*Cell {
if x != nil {
return x.Fields
}
return nil
}
type SqlDiagnostic struct {
state protoimpl.MessageState `protogen:"open.v1"`
Line int32 `protobuf:"varint,1,opt,name=line,proto3" json:"line,omitempty"`
Column int32 `protobuf:"varint,2,opt,name=column,proto3" json:"column,omitempty"`
Message string `protobuf:"bytes,3,opt,name=message,proto3" json:"message,omitempty"`
Severity string `protobuf:"bytes,4,opt,name=severity,proto3" json:"severity,omitempty"`
EndLine int32 `protobuf:"varint,5,opt,name=end_line,json=endLine,proto3" json:"end_line,omitempty"`
EndColumn int32 `protobuf:"varint,6,opt,name=end_column,json=endColumn,proto3" json:"end_column,omitempty"`
StartByte int32 `protobuf:"varint,7,opt,name=start_byte,json=startByte,proto3" json:"start_byte,omitempty"`
EndByte int32 `protobuf:"varint,8,opt,name=end_byte,json=endByte,proto3" json:"end_byte,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SqlDiagnostic) Reset() {
*x = SqlDiagnostic{}
mi := &file_emulator_proto_msgTypes[55]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SqlDiagnostic) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SqlDiagnostic) ProtoMessage() {}
func (x *SqlDiagnostic) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[55]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SqlDiagnostic.ProtoReflect.Descriptor instead.
func (*SqlDiagnostic) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{55}
}
func (x *SqlDiagnostic) GetLine() int32 {
if x != nil {
return x.Line
}
return 0
}
func (x *SqlDiagnostic) GetColumn() int32 {
if x != nil {
return x.Column
}
return 0
}
func (x *SqlDiagnostic) GetMessage() string {
if x != nil {
return x.Message
}
return ""
}
func (x *SqlDiagnostic) GetSeverity() string {
if x != nil {
return x.Severity
}
return ""
}
func (x *SqlDiagnostic) GetEndLine() int32 {
if x != nil {
return x.EndLine
}
return 0
}
func (x *SqlDiagnostic) GetEndColumn() int32 {
if x != nil {
return x.EndColumn
}
return 0
}
func (x *SqlDiagnostic) GetStartByte() int32 {
if x != nil {
return x.StartByte
}
return 0
}
func (x *SqlDiagnostic) GetEndByte() int32 {
if x != nil {
return x.EndByte
}
return 0
}
type FormatSqlRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Sql string `protobuf:"bytes,1,opt,name=sql,proto3" json:"sql,omitempty"`
// When true, use strict FormatSql (strips comments). Default is lenient.
Strict bool `protobuf:"varint,2,opt,name=strict,proto3" json:"strict,omitempty"`
LineLengthLimit int32 `protobuf:"varint,3,opt,name=line_length_limit,json=lineLengthLimit,proto3" json:"line_length_limit,omitempty"`
IndentationSpaces int32 `protobuf:"varint,4,opt,name=indentation_spaces,json=indentationSpaces,proto3" json:"indentation_spaces,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FormatSqlRequest) Reset() {
*x = FormatSqlRequest{}
mi := &file_emulator_proto_msgTypes[56]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FormatSqlRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FormatSqlRequest) ProtoMessage() {}
func (x *FormatSqlRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[56]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FormatSqlRequest.ProtoReflect.Descriptor instead.
func (*FormatSqlRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{56}
}
func (x *FormatSqlRequest) GetSql() string {
if x != nil {
return x.Sql
}
return ""
}
func (x *FormatSqlRequest) GetStrict() bool {
if x != nil {
return x.Strict
}
return false
}
func (x *FormatSqlRequest) GetLineLengthLimit() int32 {
if x != nil {
return x.LineLengthLimit
}
return 0
}
func (x *FormatSqlRequest) GetIndentationSpaces() int32 {
if x != nil {
return x.IndentationSpaces
}
return 0
}
type FormatSqlResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
FormattedSql string `protobuf:"bytes,1,opt,name=formatted_sql,json=formattedSql,proto3" json:"formatted_sql,omitempty"`
Diagnostics []*SqlDiagnostic `protobuf:"bytes,2,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FormatSqlResponse) Reset() {
*x = FormatSqlResponse{}
mi := &file_emulator_proto_msgTypes[57]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FormatSqlResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FormatSqlResponse) ProtoMessage() {}
func (x *FormatSqlResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[57]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FormatSqlResponse.ProtoReflect.Descriptor instead.
func (*FormatSqlResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{57}
}
func (x *FormatSqlResponse) GetFormattedSql() string {
if x != nil {
return x.FormattedSql
}
return ""
}
func (x *FormatSqlResponse) GetDiagnostics() []*SqlDiagnostic {
if x != nil {
return x.Diagnostics
}
return nil
}
type ParseSqlRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Sql string `protobuf:"bytes,1,opt,name=sql,proto3" json:"sql,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseSqlRequest) Reset() {
*x = ParseSqlRequest{}
mi := &file_emulator_proto_msgTypes[58]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ParseSqlRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ParseSqlRequest) ProtoMessage() {}
func (x *ParseSqlRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[58]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ParseSqlRequest.ProtoReflect.Descriptor instead.
func (*ParseSqlRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{58}
}
func (x *ParseSqlRequest) GetSql() string {
if x != nil {
return x.Sql
}
return ""
}
type ParseSqlResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Diagnostics []*SqlDiagnostic `protobuf:"bytes,1,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
StatementKinds []string `protobuf:"bytes,2,rep,name=statement_kinds,json=statementKinds,proto3" json:"statement_kinds,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ParseSqlResponse) Reset() {
*x = ParseSqlResponse{}
mi := &file_emulator_proto_msgTypes[59]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ParseSqlResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ParseSqlResponse) ProtoMessage() {}
func (x *ParseSqlResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[59]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ParseSqlResponse.ProtoReflect.Descriptor instead.
func (*ParseSqlResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{59}
}
func (x *ParseSqlResponse) GetDiagnostics() []*SqlDiagnostic {
if x != nil {
return x.Diagnostics
}
return nil
}
func (x *ParseSqlResponse) GetStatementKinds() []string {
if x != nil {
return x.StatementKinds
}
return nil
}
type TokenizeSqlRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Sql string `protobuf:"bytes,1,opt,name=sql,proto3" json:"sql,omitempty"`
IncludeComments bool `protobuf:"varint,2,opt,name=include_comments,json=includeComments,proto3" json:"include_comments,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *TokenizeSqlRequest) Reset() {
*x = TokenizeSqlRequest{}
mi := &file_emulator_proto_msgTypes[60]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *TokenizeSqlRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*TokenizeSqlRequest) ProtoMessage() {}
func (x *TokenizeSqlRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[60]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use TokenizeSqlRequest.ProtoReflect.Descriptor instead.
func (*TokenizeSqlRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{60}
}
func (x *TokenizeSqlRequest) GetSql() string {
if x != nil {
return x.Sql
}
return ""
}
func (x *TokenizeSqlRequest) GetIncludeComments() bool {
if x != nil {
return x.IncludeComments
}
return false
}
type SqlToken struct {
state protoimpl.MessageState `protogen:"open.v1"`
Kind string `protobuf:"bytes,1,opt,name=kind,proto3" json:"kind,omitempty"`
Image string `protobuf:"bytes,2,opt,name=image,proto3" json:"image,omitempty"`
StartByte int32 `protobuf:"varint,3,opt,name=start_byte,json=startByte,proto3" json:"start_byte,omitempty"`
EndByte int32 `protobuf:"varint,4,opt,name=end_byte,json=endByte,proto3" json:"end_byte,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SqlToken) Reset() {
*x = SqlToken{}
mi := &file_emulator_proto_msgTypes[61]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SqlToken) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SqlToken) ProtoMessage() {}
func (x *SqlToken) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[61]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SqlToken.ProtoReflect.Descriptor instead.
func (*SqlToken) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{61}
}
func (x *SqlToken) GetKind() string {
if x != nil {
return x.Kind
}
return ""
}
func (x *SqlToken) GetImage() string {
if x != nil {
return x.Image
}
return ""
}
func (x *SqlToken) GetStartByte() int32 {
if x != nil {
return x.StartByte
}
return 0
}
func (x *SqlToken) GetEndByte() int32 {
if x != nil {
return x.EndByte
}
return 0
}
type TokenizeSqlResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Tokens []*SqlToken `protobuf:"bytes,1,rep,name=tokens,proto3" json:"tokens,omitempty"`
Diagnostics []*SqlDiagnostic `protobuf:"bytes,2,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *TokenizeSqlResponse) Reset() {
*x = TokenizeSqlResponse{}
mi := &file_emulator_proto_msgTypes[62]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *TokenizeSqlResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*TokenizeSqlResponse) ProtoMessage() {}
func (x *TokenizeSqlResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[62]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use TokenizeSqlResponse.ProtoReflect.Descriptor instead.
func (*TokenizeSqlResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{62}
}
func (x *TokenizeSqlResponse) GetTokens() []*SqlToken {
if x != nil {
return x.Tokens
}
return nil
}
func (x *TokenizeSqlResponse) GetDiagnostics() []*SqlDiagnostic {
if x != nil {
return x.Diagnostics
}
return nil
}
type CompleteSqlRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
DefaultDatasetId string `protobuf:"bytes,2,opt,name=default_dataset_id,json=defaultDatasetId,proto3" json:"default_dataset_id,omitempty"`
Sql string `protobuf:"bytes,3,opt,name=sql,proto3" json:"sql,omitempty"`
CursorByteOffset int32 `protobuf:"varint,4,opt,name=cursor_byte_offset,json=cursorByteOffset,proto3" json:"cursor_byte_offset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CompleteSqlRequest) Reset() {
*x = CompleteSqlRequest{}
mi := &file_emulator_proto_msgTypes[63]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CompleteSqlRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CompleteSqlRequest) ProtoMessage() {}
func (x *CompleteSqlRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[63]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CompleteSqlRequest.ProtoReflect.Descriptor instead.
func (*CompleteSqlRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{63}
}
func (x *CompleteSqlRequest) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *CompleteSqlRequest) GetDefaultDatasetId() string {
if x != nil {
return x.DefaultDatasetId
}
return ""
}
func (x *CompleteSqlRequest) GetSql() string {
if x != nil {
return x.Sql
}
return ""
}
func (x *CompleteSqlRequest) GetCursorByteOffset() int32 {
if x != nil {
return x.CursorByteOffset
}
return 0
}
type SqlCompletionCandidate struct {
state protoimpl.MessageState `protogen:"open.v1"`
Label string `protobuf:"bytes,1,opt,name=label,proto3" json:"label,omitempty"`
Kind string `protobuf:"bytes,2,opt,name=kind,proto3" json:"kind,omitempty"`
InsertText string `protobuf:"bytes,3,opt,name=insert_text,json=insertText,proto3" json:"insert_text,omitempty"`
Detail string `protobuf:"bytes,4,opt,name=detail,proto3" json:"detail,omitempty"`
Fqn string `protobuf:"bytes,5,opt,name=fqn,proto3" json:"fqn,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SqlCompletionCandidate) Reset() {
*x = SqlCompletionCandidate{}
mi := &file_emulator_proto_msgTypes[64]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SqlCompletionCandidate) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SqlCompletionCandidate) ProtoMessage() {}
func (x *SqlCompletionCandidate) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[64]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SqlCompletionCandidate.ProtoReflect.Descriptor instead.
func (*SqlCompletionCandidate) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{64}
}
func (x *SqlCompletionCandidate) GetLabel() string {
if x != nil {
return x.Label
}
return ""
}
func (x *SqlCompletionCandidate) GetKind() string {
if x != nil {
return x.Kind
}
return ""
}
func (x *SqlCompletionCandidate) GetInsertText() string {
if x != nil {
return x.InsertText
}
return ""
}
func (x *SqlCompletionCandidate) GetDetail() string {
if x != nil {
return x.Detail
}
return ""
}
func (x *SqlCompletionCandidate) GetFqn() string {
if x != nil {
return x.Fqn
}
return ""
}
type CompleteSqlResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Candidates []*SqlCompletionCandidate `protobuf:"bytes,1,rep,name=candidates,proto3" json:"candidates,omitempty"`
ReplacementStart int32 `protobuf:"varint,2,opt,name=replacement_start,json=replacementStart,proto3" json:"replacement_start,omitempty"`
ReplacementEnd int32 `protobuf:"varint,3,opt,name=replacement_end,json=replacementEnd,proto3" json:"replacement_end,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CompleteSqlResponse) Reset() {
*x = CompleteSqlResponse{}
mi := &file_emulator_proto_msgTypes[65]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CompleteSqlResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CompleteSqlResponse) ProtoMessage() {}
func (x *CompleteSqlResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[65]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CompleteSqlResponse.ProtoReflect.Descriptor instead.
func (*CompleteSqlResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{65}
}
func (x *CompleteSqlResponse) GetCandidates() []*SqlCompletionCandidate {
if x != nil {
return x.Candidates
}
return nil
}
func (x *CompleteSqlResponse) GetReplacementStart() int32 {
if x != nil {
return x.ReplacementStart
}
return 0
}
func (x *CompleteSqlResponse) GetReplacementEnd() int32 {
if x != nil {
return x.ReplacementEnd
}
return 0
}
type AnalyzeSqlRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
DefaultDatasetId string `protobuf:"bytes,2,opt,name=default_dataset_id,json=defaultDatasetId,proto3" json:"default_dataset_id,omitempty"`
Sql string `protobuf:"bytes,3,opt,name=sql,proto3" json:"sql,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *AnalyzeSqlRequest) Reset() {
*x = AnalyzeSqlRequest{}
mi := &file_emulator_proto_msgTypes[66]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *AnalyzeSqlRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*AnalyzeSqlRequest) ProtoMessage() {}
func (x *AnalyzeSqlRequest) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[66]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use AnalyzeSqlRequest.ProtoReflect.Descriptor instead.
func (*AnalyzeSqlRequest) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{66}
}
func (x *AnalyzeSqlRequest) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *AnalyzeSqlRequest) GetDefaultDatasetId() string {
if x != nil {
return x.DefaultDatasetId
}
return ""
}
func (x *AnalyzeSqlRequest) GetSql() string {
if x != nil {
return x.Sql
}
return ""
}
type ReferencedTable struct {
state protoimpl.MessageState `protogen:"open.v1"`
ProjectId string `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
DatasetId string `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
TableId string `protobuf:"bytes,3,opt,name=table_id,json=tableId,proto3" json:"table_id,omitempty"`
Alias string `protobuf:"bytes,4,opt,name=alias,proto3" json:"alias,omitempty"`
Kind string `protobuf:"bytes,5,opt,name=kind,proto3" json:"kind,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReferencedTable) Reset() {
*x = ReferencedTable{}
mi := &file_emulator_proto_msgTypes[67]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReferencedTable) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReferencedTable) ProtoMessage() {}
func (x *ReferencedTable) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[67]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReferencedTable.ProtoReflect.Descriptor instead.
func (*ReferencedTable) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{67}
}
func (x *ReferencedTable) GetProjectId() string {
if x != nil {
return x.ProjectId
}
return ""
}
func (x *ReferencedTable) GetDatasetId() string {
if x != nil {
return x.DatasetId
}
return ""
}
func (x *ReferencedTable) GetTableId() string {
if x != nil {
return x.TableId
}
return ""
}
func (x *ReferencedTable) GetAlias() string {
if x != nil {
return x.Alias
}
return ""
}
func (x *ReferencedTable) GetKind() string {
if x != nil {
return x.Kind
}
return ""
}
type AnalyzeSqlResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
ReferencedTables []*ReferencedTable `protobuf:"bytes,1,rep,name=referenced_tables,json=referencedTables,proto3" json:"referenced_tables,omitempty"`
StatementKinds []string `protobuf:"bytes,2,rep,name=statement_kinds,json=statementKinds,proto3" json:"statement_kinds,omitempty"`
Diagnostics []*SqlDiagnostic `protobuf:"bytes,3,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *AnalyzeSqlResponse) Reset() {
*x = AnalyzeSqlResponse{}
mi := &file_emulator_proto_msgTypes[68]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *AnalyzeSqlResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*AnalyzeSqlResponse) ProtoMessage() {}
func (x *AnalyzeSqlResponse) ProtoReflect() protoreflect.Message {
mi := &file_emulator_proto_msgTypes[68]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use AnalyzeSqlResponse.ProtoReflect.Descriptor instead.
func (*AnalyzeSqlResponse) Descriptor() ([]byte, []int) {
return file_emulator_proto_rawDescGZIP(), []int{68}
}
func (x *AnalyzeSqlResponse) GetReferencedTables() []*ReferencedTable {
if x != nil {
return x.ReferencedTables
}
return nil
}
func (x *AnalyzeSqlResponse) GetStatementKinds() []string {
if x != nil {
return x.StatementKinds
}
return nil
}
func (x *AnalyzeSqlResponse) GetDiagnostics() []*SqlDiagnostic {
if x != nil {
return x.Diagnostics
}
return nil
}
var File_emulator_proto protoreflect.FileDescriptor
const file_emulator_proto_rawDesc = "" +
"\n" +
"\x0eemulator.proto\x12\x14bigquery_emulator.v1\"J\n" +
"\n" +
"DatasetRef\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
"\n" +
"dataset_id\x18\x02 \x01(\tR\tdatasetId\"\x82\x01\n" +
"\bTableRef\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
"\n" +
"dataset_id\x18\x02 \x01(\tR\tdatasetId\x12\x19\n" +
"\btable_id\x18\x03 \x01(\tR\atableId\x12\x1d\n" +
"\n" +
"table_type\x18\x04 \x01(\tR\ttableType\"\xa6\x01\n" +
"\vFieldSchema\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12\x12\n" +
"\x04type\x18\x02 \x01(\tR\x04type\x12\x12\n" +
"\x04mode\x18\x03 \x01(\tR\x04mode\x12 \n" +
"\vdescription\x18\x04 \x01(\tR\vdescription\x129\n" +
"\x06fields\x18\x05 \x03(\v2!.bigquery_emulator.v1.FieldSchemaR\x06fields\"H\n" +
"\vTableSchema\x129\n" +
"\x06fields\x18\x01 \x03(\v2!.bigquery_emulator.v1.FieldSchemaR\x06fields\"p\n" +
"\x16RegisterDatasetRequest\x12:\n" +
"\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\x12\x1a\n" +
"\blocation\x18\x02 \x01(\tR\blocation\"\x19\n" +
"\x17RegisterDatasetResponse\"\xa7\x01\n" +
"\x12DropDatasetRequest\x12:\n" +
"\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\x12'\n" +
"\x0fdelete_contents\x18\x02 \x01(\bR\x0edeleteContents\x12,\n" +
"\x12rest_metadata_json\x18\x03 \x01(\tR\x10restMetadataJson\"\x15\n" +
"\x13DropDatasetResponse\"T\n" +
"\x16UndeleteDatasetRequest\x12:\n" +
"\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\"G\n" +
"\x17UndeleteDatasetResponse\x12,\n" +
"\x12rest_metadata_json\x18\x01 \x01(\tR\x10restMetadataJson\"4\n" +
"\x13ListDatasetsRequest\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\"T\n" +
"\x14ListDatasetsResponse\x12<\n" +
"\bdatasets\x18\x01 \x03(\v2 .bigquery_emulator.v1.DatasetRefR\bdatasets\"\x87\x01\n" +
"\x14RegisterTableRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x129\n" +
"\x06schema\x18\x02 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\"\x17\n" +
"\x15RegisterTableResponse\"H\n" +
"\x10DropTableRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\"\x13\n" +
"\x11DropTableResponse\"O\n" +
"\x11ListTablesRequest\x12:\n" +
"\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\"L\n" +
"\x12ListTablesResponse\x126\n" +
"\x06tables\x18\x01 \x03(\v2\x1e.bigquery_emulator.v1.TableRefR\x06tables\"L\n" +
"\x14DescribeTableRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\"\xbf\x01\n" +
"\x15DescribeTableResponse\x129\n" +
"\x06schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12\x1d\n" +
"\n" +
"table_type\x18\x02 \x01(\tR\ttableType\x12\x1d\n" +
"\n" +
"view_query\x18\x03 \x01(\tR\tviewQuery\x12-\n" +
"\x13view_use_legacy_sql\x18\x04 \x01(\bR\x10viewUseLegacySql\";\n" +
"\aDataRow\x120\n" +
"\x05cells\x18\x01 \x03(\v2\x1a.bigquery_emulator.v1.CellR\x05cells\"|\n" +
"\x11InsertRowsRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x121\n" +
"\x04rows\x18\x02 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\"\x14\n" +
"\x12InsertRowsResponse\"\x89\x01\n" +
"\x0fListRowsRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12\x1f\n" +
"\vstart_index\x18\x02 \x01(\x03R\n" +
"startIndex\x12\x1f\n" +
"\vmax_results\x18\x03 \x01(\x03R\n" +
"maxResults\"i\n" +
"\n" +
"RoutineRef\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
"\n" +
"dataset_id\x18\x02 \x01(\tR\tdatasetId\x12\x1d\n" +
"\n" +
"routine_id\x18\x03 \x01(\tR\troutineId\"\xf7\x01\n" +
"\x11RoutineDescriptor\x12:\n" +
"\aroutine\x18\x01 \x01(\v2 .bigquery_emulator.v1.RoutineRefR\aroutine\x12!\n" +
"\froutine_type\x18\x02 \x01(\tR\vroutineType\x12\x1a\n" +
"\blanguage\x18\x03 \x01(\tR\blanguage\x12'\n" +
"\x0fdefinition_body\x18\x04 \x01(\tR\x0edefinitionBody\x12\x17\n" +
"\addl_sql\x18\x05 \x01(\tR\x06ddlSql\x12%\n" +
"\x0esignature_json\x18\x06 \x01(\tR\rsignatureJson\"Q\n" +
"\x13ListRoutinesRequest\x12:\n" +
"\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\"[\n" +
"\x14ListRoutinesResponse\x12C\n" +
"\broutines\x18\x01 \x03(\v2'.bigquery_emulator.v1.RoutineDescriptorR\broutines\"O\n" +
"\x11GetRoutineRequest\x12:\n" +
"\aroutine\x18\x01 \x01(\v2 .bigquery_emulator.v1.RoutineRefR\aroutine\"W\n" +
"\x12GetRoutineResponse\x12A\n" +
"\aroutine\x18\x01 \x01(\v2'.bigquery_emulator.v1.RoutineDescriptorR\aroutine\"Y\n" +
"\x14UpsertRoutineRequest\x12A\n" +
"\aroutine\x18\x01 \x01(\v2'.bigquery_emulator.v1.RoutineDescriptorR\aroutine\"\x17\n" +
"\x15UpsertRoutineResponse\"R\n" +
"\x14DeleteRoutineRequest\x12:\n" +
"\aroutine\x18\x01 \x01(\v2 .bigquery_emulator.v1.RoutineRefR\aroutine\"\x17\n" +
"\x15DeleteRoutineResponse\"\x88\x02\n" +
"\x0fRowAccessPolicy\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12\x1b\n" +
"\tpolicy_id\x18\x02 \x01(\tR\bpolicyId\x12)\n" +
"\x10filter_predicate\x18\x03 \x01(\tR\x0ffilterPredicate\x12\x1a\n" +
"\bgrantees\x18\x04 \x03(\tR\bgrantees\x12(\n" +
"\x10creation_time_ms\x18\x05 \x01(\x03R\x0ecreationTimeMs\x121\n" +
"\x15last_modified_time_ms\x18\x06 \x01(\x03R\x12lastModifiedTimeMs\"]\n" +
"\x1cUpsertRowAccessPolicyRequest\x12=\n" +
"\x06policy\x18\x01 \x01(\v2%.bigquery_emulator.v1.RowAccessPolicyR\x06policy\"^\n" +
"\x1dUpsertRowAccessPolicyResponse\x12=\n" +
"\x06policy\x18\x01 \x01(\v2%.bigquery_emulator.v1.RowAccessPolicyR\x06policy\"q\n" +
"\x1cDeleteRowAccessPolicyRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12\x1b\n" +
"\tpolicy_id\x18\x02 \x01(\tR\bpolicyId\"\x1f\n" +
"\x1dDeleteRowAccessPolicyResponse\"T\n" +
"\x1cListRowAccessPoliciesRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\"b\n" +
"\x1dListRowAccessPoliciesResponse\x12A\n" +
"\bpolicies\x18\x01 \x03(\v2%.bigquery_emulator.v1.RowAccessPolicyR\bpolicies\"\xc4\x01\n" +
"\x10ColumnGovernance\x12\x1f\n" +
"\vcolumn_name\x18\x01 \x01(\tR\n" +
"columnName\x12\x1f\n" +
"\vpolicy_tags\x18\x02 \x03(\tR\n" +
"policyTags\x12\x1b\n" +
"\tmask_kind\x18\x03 \x01(\tR\bmaskKind\x12#\n" +
"\rmask_grantees\x18\x04 \x03(\tR\fmaskGrantees\x12,\n" +
"\x12default_mask_value\x18\x05 \x01(\tR\x10defaultMaskValue\"\x92\x01\n" +
"\x1aSetColumnGovernanceRequest\x124\n" +
"\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12>\n" +
"\x06column\x18\x02 \x01(\v2&.bigquery_emulator.v1.ColumnGovernanceR\x06column\"\x1d\n" +
"\x1bSetColumnGovernanceResponse\"\x8e\x01\n" +
"\x10ListRowsResponse\x121\n" +
"\x04rows\x18\x01 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\x12\x1d\n" +
"\n" +
"total_rows\x18\x02 \x01(\x03R\ttotalRows\x12(\n" +
"\x10next_start_index\x18\x03 \x01(\x03R\x0enextStartIndex\"\xf5\x02\n" +
"\fQueryRequest\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12,\n" +
"\x12default_dataset_id\x18\x02 \x01(\tR\x10defaultDatasetId\x12\x10\n" +
"\x03sql\x18\x03 \x01(\tR\x03sql\x12R\n" +
"\n" +
"parameters\x18\x04 \x03(\v22.bigquery_emulator.v1.QueryRequest.ParametersEntryR\n" +
"parameters\x12$\n" +
"\x0euse_legacy_sql\x18\x05 \x01(\bR\fuseLegacySql\x12'\n" +
"\x0fprincipal_email\x18\x06 \x01(\tR\x0eprincipalEmail\x1ac\n" +
"\x0fParametersEntry\x12\x10\n" +
"\x03key\x18\x01 \x01(\tR\x03key\x12:\n" +
"\x05value\x18\x02 \x01(\v2$.bigquery_emulator.v1.QueryParameterR\x05value:\x028\x01\"i\n" +
"\x0eQueryParameter\x12\x1b\n" +
"\ttype_kind\x18\x01 \x01(\tR\btypeKind\x12\x1d\n" +
"\n" +
"value_json\x18\x02 \x01(\tR\tvalueJson\x12\x1b\n" +
"\ttype_json\x18\x03 \x01(\tR\btypeJson\"\x87\x01\n" +
"\x0eDryRunResponse\x129\n" +
"\x06schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12:\n" +
"\x19estimated_bytes_processed\x18\x02 \x01(\x03R\x17estimatedBytesProcessed\"\xd1\x02\n" +
"\x0eQueryResultRow\x129\n" +
"\x06schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x120\n" +
"\x05cells\x18\x02 \x03(\v2\x1a.bigquery_emulator.v1.CellR\x05cells\x12;\n" +
"\tdml_stats\x18\x03 \x01(\v2\x1e.bigquery_emulator.v1.DmlStatsR\bdmlStats\x12%\n" +
"\x0estatement_type\x18\x04 \x01(\tR\rstatementType\x12%\n" +
"\x0eemulator_route\x18\x05 \x01(\tR\remulatorRoute\x12G\n" +
"\rphase_timings\x18\x06 \x01(\v2\".bigquery_emulator.v1.PhaseTimingsR\fphaseTimings\"B\n" +
"\vPhaseTiming\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n" +
"\vduration_us\x18\x02 \x01(\x03R\n" +
"durationUs\"I\n" +
"\fPhaseTimings\x129\n" +
"\x06phases\x18\x01 \x03(\v2!.bigquery_emulator.v1.PhaseTimingR\x06phases\"\x90\x01\n" +
"\bDmlStats\x12,\n" +
"\x12inserted_row_count\x18\x01 \x01(\x03R\x10insertedRowCount\x12*\n" +
"\x11updated_row_count\x18\x02 \x01(\x03R\x0fupdatedRowCount\x12*\n" +
"\x11deleted_row_count\x18\x03 \x01(\x03R\x0fdeletedRowCount\"\xcd\x01\n" +
"\x04Cell\x12#\n" +
"\fstring_value\x18\x01 \x01(\tH\x00R\vstringValue\x12\x1f\n" +
"\n" +
"null_value\x18\x02 \x01(\bH\x00R\tnullValue\x123\n" +
"\x05array\x18\x03 \x01(\v2\x1b.bigquery_emulator.v1.ArrayH\x00R\x05array\x12A\n" +
"\fstruct_value\x18\x04 \x01(\v2\x1c.bigquery_emulator.v1.StructH\x00R\vstructValueB\a\n" +
"\x05value\"?\n" +
"\x05Array\x126\n" +
"\belements\x18\x01 \x03(\v2\x1a.bigquery_emulator.v1.CellR\belements\"<\n" +
"\x06Struct\x122\n" +
"\x06fields\x18\x01 \x03(\v2\x1a.bigquery_emulator.v1.CellR\x06fields\"\xe5\x01\n" +
"\rSqlDiagnostic\x12\x12\n" +
"\x04line\x18\x01 \x01(\x05R\x04line\x12\x16\n" +
"\x06column\x18\x02 \x01(\x05R\x06column\x12\x18\n" +
"\amessage\x18\x03 \x01(\tR\amessage\x12\x1a\n" +
"\bseverity\x18\x04 \x01(\tR\bseverity\x12\x19\n" +
"\bend_line\x18\x05 \x01(\x05R\aendLine\x12\x1d\n" +
"\n" +
"end_column\x18\x06 \x01(\x05R\tendColumn\x12\x1d\n" +
"\n" +
"start_byte\x18\a \x01(\x05R\tstartByte\x12\x19\n" +
"\bend_byte\x18\b \x01(\x05R\aendByte\"\x97\x01\n" +
"\x10FormatSqlRequest\x12\x10\n" +
"\x03sql\x18\x01 \x01(\tR\x03sql\x12\x16\n" +
"\x06strict\x18\x02 \x01(\bR\x06strict\x12*\n" +
"\x11line_length_limit\x18\x03 \x01(\x05R\x0flineLengthLimit\x12-\n" +
"\x12indentation_spaces\x18\x04 \x01(\x05R\x11indentationSpaces\"\x7f\n" +
"\x11FormatSqlResponse\x12#\n" +
"\rformatted_sql\x18\x01 \x01(\tR\fformattedSql\x12E\n" +
"\vdiagnostics\x18\x02 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics\"#\n" +
"\x0fParseSqlRequest\x12\x10\n" +
"\x03sql\x18\x01 \x01(\tR\x03sql\"\x82\x01\n" +
"\x10ParseSqlResponse\x12E\n" +
"\vdiagnostics\x18\x01 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics\x12'\n" +
"\x0fstatement_kinds\x18\x02 \x03(\tR\x0estatementKinds\"Q\n" +
"\x12TokenizeSqlRequest\x12\x10\n" +
"\x03sql\x18\x01 \x01(\tR\x03sql\x12)\n" +
"\x10include_comments\x18\x02 \x01(\bR\x0fincludeComments\"n\n" +
"\bSqlToken\x12\x12\n" +
"\x04kind\x18\x01 \x01(\tR\x04kind\x12\x14\n" +
"\x05image\x18\x02 \x01(\tR\x05image\x12\x1d\n" +
"\n" +
"start_byte\x18\x03 \x01(\x05R\tstartByte\x12\x19\n" +
"\bend_byte\x18\x04 \x01(\x05R\aendByte\"\x94\x01\n" +
"\x13TokenizeSqlResponse\x126\n" +
"\x06tokens\x18\x01 \x03(\v2\x1e.bigquery_emulator.v1.SqlTokenR\x06tokens\x12E\n" +
"\vdiagnostics\x18\x02 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics\"\xa1\x01\n" +
"\x12CompleteSqlRequest\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12,\n" +
"\x12default_dataset_id\x18\x02 \x01(\tR\x10defaultDatasetId\x12\x10\n" +
"\x03sql\x18\x03 \x01(\tR\x03sql\x12,\n" +
"\x12cursor_byte_offset\x18\x04 \x01(\x05R\x10cursorByteOffset\"\x8d\x01\n" +
"\x16SqlCompletionCandidate\x12\x14\n" +
"\x05label\x18\x01 \x01(\tR\x05label\x12\x12\n" +
"\x04kind\x18\x02 \x01(\tR\x04kind\x12\x1f\n" +
"\vinsert_text\x18\x03 \x01(\tR\n" +
"insertText\x12\x16\n" +
"\x06detail\x18\x04 \x01(\tR\x06detail\x12\x10\n" +
"\x03fqn\x18\x05 \x01(\tR\x03fqn\"\xb9\x01\n" +
"\x13CompleteSqlResponse\x12L\n" +
"\n" +
"candidates\x18\x01 \x03(\v2,.bigquery_emulator.v1.SqlCompletionCandidateR\n" +
"candidates\x12+\n" +
"\x11replacement_start\x18\x02 \x01(\x05R\x10replacementStart\x12'\n" +
"\x0freplacement_end\x18\x03 \x01(\x05R\x0ereplacementEnd\"r\n" +
"\x11AnalyzeSqlRequest\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12,\n" +
"\x12default_dataset_id\x18\x02 \x01(\tR\x10defaultDatasetId\x12\x10\n" +
"\x03sql\x18\x03 \x01(\tR\x03sql\"\x94\x01\n" +
"\x0fReferencedTable\x12\x1d\n" +
"\n" +
"project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
"\n" +
"dataset_id\x18\x02 \x01(\tR\tdatasetId\x12\x19\n" +
"\btable_id\x18\x03 \x01(\tR\atableId\x12\x14\n" +
"\x05alias\x18\x04 \x01(\tR\x05alias\x12\x12\n" +
"\x04kind\x18\x05 \x01(\tR\x04kind\"\xd8\x01\n" +
"\x12AnalyzeSqlResponse\x12R\n" +
"\x11referenced_tables\x18\x01 \x03(\v2%.bigquery_emulator.v1.ReferencedTableR\x10referencedTables\x12'\n" +
"\x0fstatement_kinds\x18\x02 \x03(\tR\x0estatementKinds\x12E\n" +
"\vdiagnostics\x18\x03 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics2\xa4\x0f\n" +
"\aCatalog\x12n\n" +
"\x0fRegisterDataset\x12,.bigquery_emulator.v1.RegisterDatasetRequest\x1a-.bigquery_emulator.v1.RegisterDatasetResponse\x12b\n" +
"\vDropDataset\x12(.bigquery_emulator.v1.DropDatasetRequest\x1a).bigquery_emulator.v1.DropDatasetResponse\x12n\n" +
"\x0fUndeleteDataset\x12,.bigquery_emulator.v1.UndeleteDatasetRequest\x1a-.bigquery_emulator.v1.UndeleteDatasetResponse\x12e\n" +
"\fListDatasets\x12).bigquery_emulator.v1.ListDatasetsRequest\x1a*.bigquery_emulator.v1.ListDatasetsResponse\x12h\n" +
"\rRegisterTable\x12*.bigquery_emulator.v1.RegisterTableRequest\x1a+.bigquery_emulator.v1.RegisterTableResponse\x12\\\n" +
"\tDropTable\x12&.bigquery_emulator.v1.DropTableRequest\x1a'.bigquery_emulator.v1.DropTableResponse\x12_\n" +
"\n" +
"ListTables\x12'.bigquery_emulator.v1.ListTablesRequest\x1a(.bigquery_emulator.v1.ListTablesResponse\x12h\n" +
"\rDescribeTable\x12*.bigquery_emulator.v1.DescribeTableRequest\x1a+.bigquery_emulator.v1.DescribeTableResponse\x12_\n" +
"\n" +
"InsertRows\x12'.bigquery_emulator.v1.InsertRowsRequest\x1a(.bigquery_emulator.v1.InsertRowsResponse\x12Y\n" +
"\bListRows\x12%.bigquery_emulator.v1.ListRowsRequest\x1a&.bigquery_emulator.v1.ListRowsResponse\x12e\n" +
"\fListRoutines\x12).bigquery_emulator.v1.ListRoutinesRequest\x1a*.bigquery_emulator.v1.ListRoutinesResponse\x12_\n" +
"\n" +
"GetRoutine\x12'.bigquery_emulator.v1.GetRoutineRequest\x1a(.bigquery_emulator.v1.GetRoutineResponse\x12h\n" +
"\rUpsertRoutine\x12*.bigquery_emulator.v1.UpsertRoutineRequest\x1a+.bigquery_emulator.v1.UpsertRoutineResponse\x12h\n" +
"\rDeleteRoutine\x12*.bigquery_emulator.v1.DeleteRoutineRequest\x1a+.bigquery_emulator.v1.DeleteRoutineResponse\x12\x80\x01\n" +
"\x15UpsertRowAccessPolicy\x122.bigquery_emulator.v1.UpsertRowAccessPolicyRequest\x1a3.bigquery_emulator.v1.UpsertRowAccessPolicyResponse\x12\x80\x01\n" +
"\x15DeleteRowAccessPolicy\x122.bigquery_emulator.v1.DeleteRowAccessPolicyRequest\x1a3.bigquery_emulator.v1.DeleteRowAccessPolicyResponse\x12\x80\x01\n" +
"\x15ListRowAccessPolicies\x122.bigquery_emulator.v1.ListRowAccessPoliciesRequest\x1a3.bigquery_emulator.v1.ListRowAccessPoliciesResponse\x12z\n" +
"\x13SetColumnGovernance\x120.bigquery_emulator.v1.SetColumnGovernanceRequest\x1a1.bigquery_emulator.v1.SetColumnGovernanceResponse2\xb7\x01\n" +
"\x05Query\x12R\n" +
"\x06DryRun\x12\".bigquery_emulator.v1.QueryRequest\x1a$.bigquery_emulator.v1.DryRunResponse\x12Z\n" +
"\fExecuteQuery\x12\".bigquery_emulator.v1.QueryRequest\x1a$.bigquery_emulator.v1.QueryResultRow0\x012\xdd\x03\n" +
"\bSqlTools\x12Y\n" +
"\x06Format\x12&.bigquery_emulator.v1.FormatSqlRequest\x1a'.bigquery_emulator.v1.FormatSqlResponse\x12V\n" +
"\x05Parse\x12%.bigquery_emulator.v1.ParseSqlRequest\x1a&.bigquery_emulator.v1.ParseSqlResponse\x12_\n" +
"\bTokenize\x12(.bigquery_emulator.v1.TokenizeSqlRequest\x1a).bigquery_emulator.v1.TokenizeSqlResponse\x12_\n" +
"\bComplete\x12(.bigquery_emulator.v1.CompleteSqlRequest\x1a).bigquery_emulator.v1.CompleteSqlResponse\x12\\\n" +
"\aAnalyze\x12'.bigquery_emulator.v1.AnalyzeSqlRequest\x1a(.bigquery_emulator.v1.AnalyzeSqlResponseBFZAgithub.com/vantaboard/bigquery-emulator/gateway/enginepb;enginepb\xf8\x01\x01b\x06proto3"
var (
file_emulator_proto_rawDescOnce sync.Once
file_emulator_proto_rawDescData []byte
)
func file_emulator_proto_rawDescGZIP() []byte {
file_emulator_proto_rawDescOnce.Do(func() {
file_emulator_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_emulator_proto_rawDesc), len(file_emulator_proto_rawDesc)))
})
return file_emulator_proto_rawDescData
}
var file_emulator_proto_msgTypes = make([]protoimpl.MessageInfo, 70)
var file_emulator_proto_goTypes = []any{
(*DatasetRef)(nil), // 0: bigquery_emulator.v1.DatasetRef
(*TableRef)(nil), // 1: bigquery_emulator.v1.TableRef
(*FieldSchema)(nil), // 2: bigquery_emulator.v1.FieldSchema
(*TableSchema)(nil), // 3: bigquery_emulator.v1.TableSchema
(*RegisterDatasetRequest)(nil), // 4: bigquery_emulator.v1.RegisterDatasetRequest
(*RegisterDatasetResponse)(nil), // 5: bigquery_emulator.v1.RegisterDatasetResponse
(*DropDatasetRequest)(nil), // 6: bigquery_emulator.v1.DropDatasetRequest
(*DropDatasetResponse)(nil), // 7: bigquery_emulator.v1.DropDatasetResponse
(*UndeleteDatasetRequest)(nil), // 8: bigquery_emulator.v1.UndeleteDatasetRequest
(*UndeleteDatasetResponse)(nil), // 9: bigquery_emulator.v1.UndeleteDatasetResponse
(*ListDatasetsRequest)(nil), // 10: bigquery_emulator.v1.ListDatasetsRequest
(*ListDatasetsResponse)(nil), // 11: bigquery_emulator.v1.ListDatasetsResponse
(*RegisterTableRequest)(nil), // 12: bigquery_emulator.v1.RegisterTableRequest
(*RegisterTableResponse)(nil), // 13: bigquery_emulator.v1.RegisterTableResponse
(*DropTableRequest)(nil), // 14: bigquery_emulator.v1.DropTableRequest
(*DropTableResponse)(nil), // 15: bigquery_emulator.v1.DropTableResponse
(*ListTablesRequest)(nil), // 16: bigquery_emulator.v1.ListTablesRequest
(*ListTablesResponse)(nil), // 17: bigquery_emulator.v1.ListTablesResponse
(*DescribeTableRequest)(nil), // 18: bigquery_emulator.v1.DescribeTableRequest
(*DescribeTableResponse)(nil), // 19: bigquery_emulator.v1.DescribeTableResponse
(*DataRow)(nil), // 20: bigquery_emulator.v1.DataRow
(*InsertRowsRequest)(nil), // 21: bigquery_emulator.v1.InsertRowsRequest
(*InsertRowsResponse)(nil), // 22: bigquery_emulator.v1.InsertRowsResponse
(*ListRowsRequest)(nil), // 23: bigquery_emulator.v1.ListRowsRequest
(*RoutineRef)(nil), // 24: bigquery_emulator.v1.RoutineRef
(*RoutineDescriptor)(nil), // 25: bigquery_emulator.v1.RoutineDescriptor
(*ListRoutinesRequest)(nil), // 26: bigquery_emulator.v1.ListRoutinesRequest
(*ListRoutinesResponse)(nil), // 27: bigquery_emulator.v1.ListRoutinesResponse
(*GetRoutineRequest)(nil), // 28: bigquery_emulator.v1.GetRoutineRequest
(*GetRoutineResponse)(nil), // 29: bigquery_emulator.v1.GetRoutineResponse
(*UpsertRoutineRequest)(nil), // 30: bigquery_emulator.v1.UpsertRoutineRequest
(*UpsertRoutineResponse)(nil), // 31: bigquery_emulator.v1.UpsertRoutineResponse
(*DeleteRoutineRequest)(nil), // 32: bigquery_emulator.v1.DeleteRoutineRequest
(*DeleteRoutineResponse)(nil), // 33: bigquery_emulator.v1.DeleteRoutineResponse
(*RowAccessPolicy)(nil), // 34: bigquery_emulator.v1.RowAccessPolicy
(*UpsertRowAccessPolicyRequest)(nil), // 35: bigquery_emulator.v1.UpsertRowAccessPolicyRequest
(*UpsertRowAccessPolicyResponse)(nil), // 36: bigquery_emulator.v1.UpsertRowAccessPolicyResponse
(*DeleteRowAccessPolicyRequest)(nil), // 37: bigquery_emulator.v1.DeleteRowAccessPolicyRequest
(*DeleteRowAccessPolicyResponse)(nil), // 38: bigquery_emulator.v1.DeleteRowAccessPolicyResponse
(*ListRowAccessPoliciesRequest)(nil), // 39: bigquery_emulator.v1.ListRowAccessPoliciesRequest
(*ListRowAccessPoliciesResponse)(nil), // 40: bigquery_emulator.v1.ListRowAccessPoliciesResponse
(*ColumnGovernance)(nil), // 41: bigquery_emulator.v1.ColumnGovernance
(*SetColumnGovernanceRequest)(nil), // 42: bigquery_emulator.v1.SetColumnGovernanceRequest
(*SetColumnGovernanceResponse)(nil), // 43: bigquery_emulator.v1.SetColumnGovernanceResponse
(*ListRowsResponse)(nil), // 44: bigquery_emulator.v1.ListRowsResponse
(*QueryRequest)(nil), // 45: bigquery_emulator.v1.QueryRequest
(*QueryParameter)(nil), // 46: bigquery_emulator.v1.QueryParameter
(*DryRunResponse)(nil), // 47: bigquery_emulator.v1.DryRunResponse
(*QueryResultRow)(nil), // 48: bigquery_emulator.v1.QueryResultRow
(*PhaseTiming)(nil), // 49: bigquery_emulator.v1.PhaseTiming
(*PhaseTimings)(nil), // 50: bigquery_emulator.v1.PhaseTimings
(*DmlStats)(nil), // 51: bigquery_emulator.v1.DmlStats
(*Cell)(nil), // 52: bigquery_emulator.v1.Cell
(*Array)(nil), // 53: bigquery_emulator.v1.Array
(*Struct)(nil), // 54: bigquery_emulator.v1.Struct
(*SqlDiagnostic)(nil), // 55: bigquery_emulator.v1.SqlDiagnostic
(*FormatSqlRequest)(nil), // 56: bigquery_emulator.v1.FormatSqlRequest
(*FormatSqlResponse)(nil), // 57: bigquery_emulator.v1.FormatSqlResponse
(*ParseSqlRequest)(nil), // 58: bigquery_emulator.v1.ParseSqlRequest
(*ParseSqlResponse)(nil), // 59: bigquery_emulator.v1.ParseSqlResponse
(*TokenizeSqlRequest)(nil), // 60: bigquery_emulator.v1.TokenizeSqlRequest
(*SqlToken)(nil), // 61: bigquery_emulator.v1.SqlToken
(*TokenizeSqlResponse)(nil), // 62: bigquery_emulator.v1.TokenizeSqlResponse
(*CompleteSqlRequest)(nil), // 63: bigquery_emulator.v1.CompleteSqlRequest
(*SqlCompletionCandidate)(nil), // 64: bigquery_emulator.v1.SqlCompletionCandidate
(*CompleteSqlResponse)(nil), // 65: bigquery_emulator.v1.CompleteSqlResponse
(*AnalyzeSqlRequest)(nil), // 66: bigquery_emulator.v1.AnalyzeSqlRequest
(*ReferencedTable)(nil), // 67: bigquery_emulator.v1.ReferencedTable
(*AnalyzeSqlResponse)(nil), // 68: bigquery_emulator.v1.AnalyzeSqlResponse
nil, // 69: bigquery_emulator.v1.QueryRequest.ParametersEntry
}
var file_emulator_proto_depIdxs = []int32{
2, // 0: bigquery_emulator.v1.FieldSchema.fields:type_name -> bigquery_emulator.v1.FieldSchema
2, // 1: bigquery_emulator.v1.TableSchema.fields:type_name -> bigquery_emulator.v1.FieldSchema
0, // 2: bigquery_emulator.v1.RegisterDatasetRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
0, // 3: bigquery_emulator.v1.DropDatasetRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
0, // 4: bigquery_emulator.v1.UndeleteDatasetRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
0, // 5: bigquery_emulator.v1.ListDatasetsResponse.datasets:type_name -> bigquery_emulator.v1.DatasetRef
1, // 6: bigquery_emulator.v1.RegisterTableRequest.table:type_name -> bigquery_emulator.v1.TableRef
3, // 7: bigquery_emulator.v1.RegisterTableRequest.schema:type_name -> bigquery_emulator.v1.TableSchema
1, // 8: bigquery_emulator.v1.DropTableRequest.table:type_name -> bigquery_emulator.v1.TableRef
0, // 9: bigquery_emulator.v1.ListTablesRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
1, // 10: bigquery_emulator.v1.ListTablesResponse.tables:type_name -> bigquery_emulator.v1.TableRef
1, // 11: bigquery_emulator.v1.DescribeTableRequest.table:type_name -> bigquery_emulator.v1.TableRef
3, // 12: bigquery_emulator.v1.DescribeTableResponse.schema:type_name -> bigquery_emulator.v1.TableSchema
52, // 13: bigquery_emulator.v1.DataRow.cells:type_name -> bigquery_emulator.v1.Cell
1, // 14: bigquery_emulator.v1.InsertRowsRequest.table:type_name -> bigquery_emulator.v1.TableRef
20, // 15: bigquery_emulator.v1.InsertRowsRequest.rows:type_name -> bigquery_emulator.v1.DataRow
1, // 16: bigquery_emulator.v1.ListRowsRequest.table:type_name -> bigquery_emulator.v1.TableRef
24, // 17: bigquery_emulator.v1.RoutineDescriptor.routine:type_name -> bigquery_emulator.v1.RoutineRef
0, // 18: bigquery_emulator.v1.ListRoutinesRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
25, // 19: bigquery_emulator.v1.ListRoutinesResponse.routines:type_name -> bigquery_emulator.v1.RoutineDescriptor
24, // 20: bigquery_emulator.v1.GetRoutineRequest.routine:type_name -> bigquery_emulator.v1.RoutineRef
25, // 21: bigquery_emulator.v1.GetRoutineResponse.routine:type_name -> bigquery_emulator.v1.RoutineDescriptor
25, // 22: bigquery_emulator.v1.UpsertRoutineRequest.routine:type_name -> bigquery_emulator.v1.RoutineDescriptor
24, // 23: bigquery_emulator.v1.DeleteRoutineRequest.routine:type_name -> bigquery_emulator.v1.RoutineRef
1, // 24: bigquery_emulator.v1.RowAccessPolicy.table:type_name -> bigquery_emulator.v1.TableRef
34, // 25: bigquery_emulator.v1.UpsertRowAccessPolicyRequest.policy:type_name -> bigquery_emulator.v1.RowAccessPolicy
34, // 26: bigquery_emulator.v1.UpsertRowAccessPolicyResponse.policy:type_name -> bigquery_emulator.v1.RowAccessPolicy
1, // 27: bigquery_emulator.v1.DeleteRowAccessPolicyRequest.table:type_name -> bigquery_emulator.v1.TableRef
1, // 28: bigquery_emulator.v1.ListRowAccessPoliciesRequest.table:type_name -> bigquery_emulator.v1.TableRef
34, // 29: bigquery_emulator.v1.ListRowAccessPoliciesResponse.policies:type_name -> bigquery_emulator.v1.RowAccessPolicy
1, // 30: bigquery_emulator.v1.SetColumnGovernanceRequest.table:type_name -> bigquery_emulator.v1.TableRef
41, // 31: bigquery_emulator.v1.SetColumnGovernanceRequest.column:type_name -> bigquery_emulator.v1.ColumnGovernance
20, // 32: bigquery_emulator.v1.ListRowsResponse.rows:type_name -> bigquery_emulator.v1.DataRow
69, // 33: bigquery_emulator.v1.QueryRequest.parameters:type_name -> bigquery_emulator.v1.QueryRequest.ParametersEntry
3, // 34: bigquery_emulator.v1.DryRunResponse.schema:type_name -> bigquery_emulator.v1.TableSchema
3, // 35: bigquery_emulator.v1.QueryResultRow.schema:type_name -> bigquery_emulator.v1.TableSchema
52, // 36: bigquery_emulator.v1.QueryResultRow.cells:type_name -> bigquery_emulator.v1.Cell
51, // 37: bigquery_emulator.v1.QueryResultRow.dml_stats:type_name -> bigquery_emulator.v1.DmlStats
50, // 38: bigquery_emulator.v1.QueryResultRow.phase_timings:type_name -> bigquery_emulator.v1.PhaseTimings
49, // 39: bigquery_emulator.v1.PhaseTimings.phases:type_name -> bigquery_emulator.v1.PhaseTiming
53, // 40: bigquery_emulator.v1.Cell.array:type_name -> bigquery_emulator.v1.Array
54, // 41: bigquery_emulator.v1.Cell.struct_value:type_name -> bigquery_emulator.v1.Struct
52, // 42: bigquery_emulator.v1.Array.elements:type_name -> bigquery_emulator.v1.Cell
52, // 43: bigquery_emulator.v1.Struct.fields:type_name -> bigquery_emulator.v1.Cell
55, // 44: bigquery_emulator.v1.FormatSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
55, // 45: bigquery_emulator.v1.ParseSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
61, // 46: bigquery_emulator.v1.TokenizeSqlResponse.tokens:type_name -> bigquery_emulator.v1.SqlToken
55, // 47: bigquery_emulator.v1.TokenizeSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
64, // 48: bigquery_emulator.v1.CompleteSqlResponse.candidates:type_name -> bigquery_emulator.v1.SqlCompletionCandidate
67, // 49: bigquery_emulator.v1.AnalyzeSqlResponse.referenced_tables:type_name -> bigquery_emulator.v1.ReferencedTable
55, // 50: bigquery_emulator.v1.AnalyzeSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
46, // 51: bigquery_emulator.v1.QueryRequest.ParametersEntry.value:type_name -> bigquery_emulator.v1.QueryParameter
4, // 52: bigquery_emulator.v1.Catalog.RegisterDataset:input_type -> bigquery_emulator.v1.RegisterDatasetRequest
6, // 53: bigquery_emulator.v1.Catalog.DropDataset:input_type -> bigquery_emulator.v1.DropDatasetRequest
8, // 54: bigquery_emulator.v1.Catalog.UndeleteDataset:input_type -> bigquery_emulator.v1.UndeleteDatasetRequest
10, // 55: bigquery_emulator.v1.Catalog.ListDatasets:input_type -> bigquery_emulator.v1.ListDatasetsRequest
12, // 56: bigquery_emulator.v1.Catalog.RegisterTable:input_type -> bigquery_emulator.v1.RegisterTableRequest
14, // 57: bigquery_emulator.v1.Catalog.DropTable:input_type -> bigquery_emulator.v1.DropTableRequest
16, // 58: bigquery_emulator.v1.Catalog.ListTables:input_type -> bigquery_emulator.v1.ListTablesRequest
18, // 59: bigquery_emulator.v1.Catalog.DescribeTable:input_type -> bigquery_emulator.v1.DescribeTableRequest
21, // 60: bigquery_emulator.v1.Catalog.InsertRows:input_type -> bigquery_emulator.v1.InsertRowsRequest
23, // 61: bigquery_emulator.v1.Catalog.ListRows:input_type -> bigquery_emulator.v1.ListRowsRequest
26, // 62: bigquery_emulator.v1.Catalog.ListRoutines:input_type -> bigquery_emulator.v1.ListRoutinesRequest
28, // 63: bigquery_emulator.v1.Catalog.GetRoutine:input_type -> bigquery_emulator.v1.GetRoutineRequest
30, // 64: bigquery_emulator.v1.Catalog.UpsertRoutine:input_type -> bigquery_emulator.v1.UpsertRoutineRequest
32, // 65: bigquery_emulator.v1.Catalog.DeleteRoutine:input_type -> bigquery_emulator.v1.DeleteRoutineRequest
35, // 66: bigquery_emulator.v1.Catalog.UpsertRowAccessPolicy:input_type -> bigquery_emulator.v1.UpsertRowAccessPolicyRequest
37, // 67: bigquery_emulator.v1.Catalog.DeleteRowAccessPolicy:input_type -> bigquery_emulator.v1.DeleteRowAccessPolicyRequest
39, // 68: bigquery_emulator.v1.Catalog.ListRowAccessPolicies:input_type -> bigquery_emulator.v1.ListRowAccessPoliciesRequest
42, // 69: bigquery_emulator.v1.Catalog.SetColumnGovernance:input_type -> bigquery_emulator.v1.SetColumnGovernanceRequest
45, // 70: bigquery_emulator.v1.Query.DryRun:input_type -> bigquery_emulator.v1.QueryRequest
45, // 71: bigquery_emulator.v1.Query.ExecuteQuery:input_type -> bigquery_emulator.v1.QueryRequest
56, // 72: bigquery_emulator.v1.SqlTools.Format:input_type -> bigquery_emulator.v1.FormatSqlRequest
58, // 73: bigquery_emulator.v1.SqlTools.Parse:input_type -> bigquery_emulator.v1.ParseSqlRequest
60, // 74: bigquery_emulator.v1.SqlTools.Tokenize:input_type -> bigquery_emulator.v1.TokenizeSqlRequest
63, // 75: bigquery_emulator.v1.SqlTools.Complete:input_type -> bigquery_emulator.v1.CompleteSqlRequest
66, // 76: bigquery_emulator.v1.SqlTools.Analyze:input_type -> bigquery_emulator.v1.AnalyzeSqlRequest
5, // 77: bigquery_emulator.v1.Catalog.RegisterDataset:output_type -> bigquery_emulator.v1.RegisterDatasetResponse
7, // 78: bigquery_emulator.v1.Catalog.DropDataset:output_type -> bigquery_emulator.v1.DropDatasetResponse
9, // 79: bigquery_emulator.v1.Catalog.UndeleteDataset:output_type -> bigquery_emulator.v1.UndeleteDatasetResponse
11, // 80: bigquery_emulator.v1.Catalog.ListDatasets:output_type -> bigquery_emulator.v1.ListDatasetsResponse
13, // 81: bigquery_emulator.v1.Catalog.RegisterTable:output_type -> bigquery_emulator.v1.RegisterTableResponse
15, // 82: bigquery_emulator.v1.Catalog.DropTable:output_type -> bigquery_emulator.v1.DropTableResponse
17, // 83: bigquery_emulator.v1.Catalog.ListTables:output_type -> bigquery_emulator.v1.ListTablesResponse
19, // 84: bigquery_emulator.v1.Catalog.DescribeTable:output_type -> bigquery_emulator.v1.DescribeTableResponse
22, // 85: bigquery_emulator.v1.Catalog.InsertRows:output_type -> bigquery_emulator.v1.InsertRowsResponse
44, // 86: bigquery_emulator.v1.Catalog.ListRows:output_type -> bigquery_emulator.v1.ListRowsResponse
27, // 87: bigquery_emulator.v1.Catalog.ListRoutines:output_type -> bigquery_emulator.v1.ListRoutinesResponse
29, // 88: bigquery_emulator.v1.Catalog.GetRoutine:output_type -> bigquery_emulator.v1.GetRoutineResponse
31, // 89: bigquery_emulator.v1.Catalog.UpsertRoutine:output_type -> bigquery_emulator.v1.UpsertRoutineResponse
33, // 90: bigquery_emulator.v1.Catalog.DeleteRoutine:output_type -> bigquery_emulator.v1.DeleteRoutineResponse
36, // 91: bigquery_emulator.v1.Catalog.UpsertRowAccessPolicy:output_type -> bigquery_emulator.v1.UpsertRowAccessPolicyResponse
38, // 92: bigquery_emulator.v1.Catalog.DeleteRowAccessPolicy:output_type -> bigquery_emulator.v1.DeleteRowAccessPolicyResponse
40, // 93: bigquery_emulator.v1.Catalog.ListRowAccessPolicies:output_type -> bigquery_emulator.v1.ListRowAccessPoliciesResponse
43, // 94: bigquery_emulator.v1.Catalog.SetColumnGovernance:output_type -> bigquery_emulator.v1.SetColumnGovernanceResponse
47, // 95: bigquery_emulator.v1.Query.DryRun:output_type -> bigquery_emulator.v1.DryRunResponse
48, // 96: bigquery_emulator.v1.Query.ExecuteQuery:output_type -> bigquery_emulator.v1.QueryResultRow
57, // 97: bigquery_emulator.v1.SqlTools.Format:output_type -> bigquery_emulator.v1.FormatSqlResponse
59, // 98: bigquery_emulator.v1.SqlTools.Parse:output_type -> bigquery_emulator.v1.ParseSqlResponse
62, // 99: bigquery_emulator.v1.SqlTools.Tokenize:output_type -> bigquery_emulator.v1.TokenizeSqlResponse
65, // 100: bigquery_emulator.v1.SqlTools.Complete:output_type -> bigquery_emulator.v1.CompleteSqlResponse
68, // 101: bigquery_emulator.v1.SqlTools.Analyze:output_type -> bigquery_emulator.v1.AnalyzeSqlResponse
77, // [77:102] is the sub-list for method output_type
52, // [52:77] is the sub-list for method input_type
52, // [52:52] is the sub-list for extension type_name
52, // [52:52] is the sub-list for extension extendee
0, // [0:52] is the sub-list for field type_name
}
func init() { file_emulator_proto_init() }
func file_emulator_proto_init() {
if File_emulator_proto != nil {
return
}
file_emulator_proto_msgTypes[52].OneofWrappers = []any{
(*Cell_StringValue)(nil),
(*Cell_NullValue)(nil),
(*Cell_Array)(nil),
(*Cell_StructValue)(nil),
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_emulator_proto_rawDesc), len(file_emulator_proto_rawDesc)),
NumEnums: 0,
NumMessages: 70,
NumExtensions: 0,
NumServices: 3,
},
GoTypes: file_emulator_proto_goTypes,
DependencyIndexes: file_emulator_proto_depIdxs,
MessageInfos: file_emulator_proto_msgTypes,
}.Build()
File_emulator_proto = out.File
file_emulator_proto_goTypes = nil
file_emulator_proto_depIdxs = nil
}
// emulator.proto is the internal contract between the Go REST gateway and
// the C++ engine. It is intentionally minimal: the gateway owns the
// public-facing BigQuery REST shape, and only forwards the bits that
// actually need GoogleSQL to do their job.
//
// Code generation is wired up via buf (see ../buf.gen.yaml) for the Go
// side and via Bazel (see ./BUILD.bazel) for the C++ side. The Go
// stubs land in gateway/enginepb/ and are checked in so `go build`
// works without an extra codegen step; the C++ stubs are generated
// fresh into the Bazel output tree.
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.2
// - protoc v7.35.0
// source: emulator.proto
package enginepb
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
Catalog_RegisterDataset_FullMethodName = "/bigquery_emulator.v1.Catalog/RegisterDataset"
Catalog_DropDataset_FullMethodName = "/bigquery_emulator.v1.Catalog/DropDataset"
Catalog_UndeleteDataset_FullMethodName = "/bigquery_emulator.v1.Catalog/UndeleteDataset"
Catalog_ListDatasets_FullMethodName = "/bigquery_emulator.v1.Catalog/ListDatasets"
Catalog_RegisterTable_FullMethodName = "/bigquery_emulator.v1.Catalog/RegisterTable"
Catalog_DropTable_FullMethodName = "/bigquery_emulator.v1.Catalog/DropTable"
Catalog_ListTables_FullMethodName = "/bigquery_emulator.v1.Catalog/ListTables"
Catalog_DescribeTable_FullMethodName = "/bigquery_emulator.v1.Catalog/DescribeTable"
Catalog_InsertRows_FullMethodName = "/bigquery_emulator.v1.Catalog/InsertRows"
Catalog_ListRows_FullMethodName = "/bigquery_emulator.v1.Catalog/ListRows"
Catalog_ListRoutines_FullMethodName = "/bigquery_emulator.v1.Catalog/ListRoutines"
Catalog_GetRoutine_FullMethodName = "/bigquery_emulator.v1.Catalog/GetRoutine"
Catalog_UpsertRoutine_FullMethodName = "/bigquery_emulator.v1.Catalog/UpsertRoutine"
Catalog_DeleteRoutine_FullMethodName = "/bigquery_emulator.v1.Catalog/DeleteRoutine"
Catalog_UpsertRowAccessPolicy_FullMethodName = "/bigquery_emulator.v1.Catalog/UpsertRowAccessPolicy"
Catalog_DeleteRowAccessPolicy_FullMethodName = "/bigquery_emulator.v1.Catalog/DeleteRowAccessPolicy"
Catalog_ListRowAccessPolicies_FullMethodName = "/bigquery_emulator.v1.Catalog/ListRowAccessPolicies"
Catalog_SetColumnGovernance_FullMethodName = "/bigquery_emulator.v1.Catalog/SetColumnGovernance"
)
// CatalogClient is the client API for Catalog service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type CatalogClient interface {
RegisterDataset(ctx context.Context, in *RegisterDatasetRequest, opts ...grpc.CallOption) (*RegisterDatasetResponse, error)
DropDataset(ctx context.Context, in *DropDatasetRequest, opts ...grpc.CallOption) (*DropDatasetResponse, error)
UndeleteDataset(ctx context.Context, in *UndeleteDatasetRequest, opts ...grpc.CallOption) (*UndeleteDatasetResponse, error)
// Lists every dataset registered under `project_id`. The gateway's
// `datasets.list` REST handler delegates here; the response is
// shaped as a single page (no continuation cursor today, the
// emulator never has enough datasets to need one).
ListDatasets(ctx context.Context, in *ListDatasetsRequest, opts ...grpc.CallOption) (*ListDatasetsResponse, error)
RegisterTable(ctx context.Context, in *RegisterTableRequest, opts ...grpc.CallOption) (*RegisterTableResponse, error)
DropTable(ctx context.Context, in *DropTableRequest, opts ...grpc.CallOption) (*DropTableResponse, error)
// Lists every table registered under `dataset`. The gateway's
// `tables.list` REST handler delegates here; pagination contract
// matches ListDatasets.
ListTables(ctx context.Context, in *ListTablesRequest, opts ...grpc.CallOption) (*ListTablesResponse, error)
DescribeTable(ctx context.Context, in *DescribeTableRequest, opts ...grpc.CallOption) (*DescribeTableResponse, error)
// Row-level access. `InsertRows` delegates to `Storage::AppendRows`
// and is the engine side of `tabledata.insertAll`. `ListRows` is
// the engine side of `tabledata.list` and returns a single
// (possibly empty) page of rows plus the total row count so the
// gateway can synthesize BigQuery's pageToken semantics.
InsertRows(ctx context.Context, in *InsertRowsRequest, opts ...grpc.CallOption) (*InsertRowsResponse, error)
ListRows(ctx context.Context, in *ListRowsRequest, opts ...grpc.CallOption) (*ListRowsResponse, error)
// Routine metadata persisted in `catalog.duckdb` for REST
// round-trip and cross-restart SQL UDF / TVF / procedure replay.
ListRoutines(ctx context.Context, in *ListRoutinesRequest, opts ...grpc.CallOption) (*ListRoutinesResponse, error)
GetRoutine(ctx context.Context, in *GetRoutineRequest, opts ...grpc.CallOption) (*GetRoutineResponse, error)
UpsertRoutine(ctx context.Context, in *UpsertRoutineRequest, opts ...grpc.CallOption) (*UpsertRoutineResponse, error)
DeleteRoutine(ctx context.Context, in *DeleteRoutineRequest, opts ...grpc.CallOption) (*DeleteRoutineResponse, error)
// Row-access policies and column-level security metadata.
UpsertRowAccessPolicy(ctx context.Context, in *UpsertRowAccessPolicyRequest, opts ...grpc.CallOption) (*UpsertRowAccessPolicyResponse, error)
DeleteRowAccessPolicy(ctx context.Context, in *DeleteRowAccessPolicyRequest, opts ...grpc.CallOption) (*DeleteRowAccessPolicyResponse, error)
ListRowAccessPolicies(ctx context.Context, in *ListRowAccessPoliciesRequest, opts ...grpc.CallOption) (*ListRowAccessPoliciesResponse, error)
SetColumnGovernance(ctx context.Context, in *SetColumnGovernanceRequest, opts ...grpc.CallOption) (*SetColumnGovernanceResponse, error)
}
type catalogClient struct {
cc grpc.ClientConnInterface
}
func NewCatalogClient(cc grpc.ClientConnInterface) CatalogClient {
return &catalogClient{cc}
}
func (c *catalogClient) RegisterDataset(ctx context.Context, in *RegisterDatasetRequest, opts ...grpc.CallOption) (*RegisterDatasetResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(RegisterDatasetResponse)
err := c.cc.Invoke(ctx, Catalog_RegisterDataset_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) DropDataset(ctx context.Context, in *DropDatasetRequest, opts ...grpc.CallOption) (*DropDatasetResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(DropDatasetResponse)
err := c.cc.Invoke(ctx, Catalog_DropDataset_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) UndeleteDataset(ctx context.Context, in *UndeleteDatasetRequest, opts ...grpc.CallOption) (*UndeleteDatasetResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(UndeleteDatasetResponse)
err := c.cc.Invoke(ctx, Catalog_UndeleteDataset_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) ListDatasets(ctx context.Context, in *ListDatasetsRequest, opts ...grpc.CallOption) (*ListDatasetsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ListDatasetsResponse)
err := c.cc.Invoke(ctx, Catalog_ListDatasets_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) RegisterTable(ctx context.Context, in *RegisterTableRequest, opts ...grpc.CallOption) (*RegisterTableResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(RegisterTableResponse)
err := c.cc.Invoke(ctx, Catalog_RegisterTable_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) DropTable(ctx context.Context, in *DropTableRequest, opts ...grpc.CallOption) (*DropTableResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(DropTableResponse)
err := c.cc.Invoke(ctx, Catalog_DropTable_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) ListTables(ctx context.Context, in *ListTablesRequest, opts ...grpc.CallOption) (*ListTablesResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ListTablesResponse)
err := c.cc.Invoke(ctx, Catalog_ListTables_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) DescribeTable(ctx context.Context, in *DescribeTableRequest, opts ...grpc.CallOption) (*DescribeTableResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(DescribeTableResponse)
err := c.cc.Invoke(ctx, Catalog_DescribeTable_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) InsertRows(ctx context.Context, in *InsertRowsRequest, opts ...grpc.CallOption) (*InsertRowsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(InsertRowsResponse)
err := c.cc.Invoke(ctx, Catalog_InsertRows_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) ListRows(ctx context.Context, in *ListRowsRequest, opts ...grpc.CallOption) (*ListRowsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ListRowsResponse)
err := c.cc.Invoke(ctx, Catalog_ListRows_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) ListRoutines(ctx context.Context, in *ListRoutinesRequest, opts ...grpc.CallOption) (*ListRoutinesResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ListRoutinesResponse)
err := c.cc.Invoke(ctx, Catalog_ListRoutines_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) GetRoutine(ctx context.Context, in *GetRoutineRequest, opts ...grpc.CallOption) (*GetRoutineResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(GetRoutineResponse)
err := c.cc.Invoke(ctx, Catalog_GetRoutine_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) UpsertRoutine(ctx context.Context, in *UpsertRoutineRequest, opts ...grpc.CallOption) (*UpsertRoutineResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(UpsertRoutineResponse)
err := c.cc.Invoke(ctx, Catalog_UpsertRoutine_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) DeleteRoutine(ctx context.Context, in *DeleteRoutineRequest, opts ...grpc.CallOption) (*DeleteRoutineResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(DeleteRoutineResponse)
err := c.cc.Invoke(ctx, Catalog_DeleteRoutine_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) UpsertRowAccessPolicy(ctx context.Context, in *UpsertRowAccessPolicyRequest, opts ...grpc.CallOption) (*UpsertRowAccessPolicyResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(UpsertRowAccessPolicyResponse)
err := c.cc.Invoke(ctx, Catalog_UpsertRowAccessPolicy_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) DeleteRowAccessPolicy(ctx context.Context, in *DeleteRowAccessPolicyRequest, opts ...grpc.CallOption) (*DeleteRowAccessPolicyResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(DeleteRowAccessPolicyResponse)
err := c.cc.Invoke(ctx, Catalog_DeleteRowAccessPolicy_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) ListRowAccessPolicies(ctx context.Context, in *ListRowAccessPoliciesRequest, opts ...grpc.CallOption) (*ListRowAccessPoliciesResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ListRowAccessPoliciesResponse)
err := c.cc.Invoke(ctx, Catalog_ListRowAccessPolicies_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *catalogClient) SetColumnGovernance(ctx context.Context, in *SetColumnGovernanceRequest, opts ...grpc.CallOption) (*SetColumnGovernanceResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(SetColumnGovernanceResponse)
err := c.cc.Invoke(ctx, Catalog_SetColumnGovernance_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// CatalogServer is the server API for Catalog service.
// All implementations should embed UnimplementedCatalogServer
// for forward compatibility.
type CatalogServer interface {
RegisterDataset(context.Context, *RegisterDatasetRequest) (*RegisterDatasetResponse, error)
DropDataset(context.Context, *DropDatasetRequest) (*DropDatasetResponse, error)
UndeleteDataset(context.Context, *UndeleteDatasetRequest) (*UndeleteDatasetResponse, error)
// Lists every dataset registered under `project_id`. The gateway's
// `datasets.list` REST handler delegates here; the response is
// shaped as a single page (no continuation cursor today, the
// emulator never has enough datasets to need one).
ListDatasets(context.Context, *ListDatasetsRequest) (*ListDatasetsResponse, error)
RegisterTable(context.Context, *RegisterTableRequest) (*RegisterTableResponse, error)
DropTable(context.Context, *DropTableRequest) (*DropTableResponse, error)
// Lists every table registered under `dataset`. The gateway's
// `tables.list` REST handler delegates here; pagination contract
// matches ListDatasets.
ListTables(context.Context, *ListTablesRequest) (*ListTablesResponse, error)
DescribeTable(context.Context, *DescribeTableRequest) (*DescribeTableResponse, error)
// Row-level access. `InsertRows` delegates to `Storage::AppendRows`
// and is the engine side of `tabledata.insertAll`. `ListRows` is
// the engine side of `tabledata.list` and returns a single
// (possibly empty) page of rows plus the total row count so the
// gateway can synthesize BigQuery's pageToken semantics.
InsertRows(context.Context, *InsertRowsRequest) (*InsertRowsResponse, error)
ListRows(context.Context, *ListRowsRequest) (*ListRowsResponse, error)
// Routine metadata persisted in `catalog.duckdb` for REST
// round-trip and cross-restart SQL UDF / TVF / procedure replay.
ListRoutines(context.Context, *ListRoutinesRequest) (*ListRoutinesResponse, error)
GetRoutine(context.Context, *GetRoutineRequest) (*GetRoutineResponse, error)
UpsertRoutine(context.Context, *UpsertRoutineRequest) (*UpsertRoutineResponse, error)
DeleteRoutine(context.Context, *DeleteRoutineRequest) (*DeleteRoutineResponse, error)
// Row-access policies and column-level security metadata.
UpsertRowAccessPolicy(context.Context, *UpsertRowAccessPolicyRequest) (*UpsertRowAccessPolicyResponse, error)
DeleteRowAccessPolicy(context.Context, *DeleteRowAccessPolicyRequest) (*DeleteRowAccessPolicyResponse, error)
ListRowAccessPolicies(context.Context, *ListRowAccessPoliciesRequest) (*ListRowAccessPoliciesResponse, error)
SetColumnGovernance(context.Context, *SetColumnGovernanceRequest) (*SetColumnGovernanceResponse, error)
}
// UnimplementedCatalogServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedCatalogServer struct{}
func (UnimplementedCatalogServer) RegisterDataset(context.Context, *RegisterDatasetRequest) (*RegisterDatasetResponse, error) {
return nil, status.Error(codes.Unimplemented, "method RegisterDataset not implemented")
}
func (UnimplementedCatalogServer) DropDataset(context.Context, *DropDatasetRequest) (*DropDatasetResponse, error) {
return nil, status.Error(codes.Unimplemented, "method DropDataset not implemented")
}
func (UnimplementedCatalogServer) UndeleteDataset(context.Context, *UndeleteDatasetRequest) (*UndeleteDatasetResponse, error) {
return nil, status.Error(codes.Unimplemented, "method UndeleteDataset not implemented")
}
func (UnimplementedCatalogServer) ListDatasets(context.Context, *ListDatasetsRequest) (*ListDatasetsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ListDatasets not implemented")
}
func (UnimplementedCatalogServer) RegisterTable(context.Context, *RegisterTableRequest) (*RegisterTableResponse, error) {
return nil, status.Error(codes.Unimplemented, "method RegisterTable not implemented")
}
func (UnimplementedCatalogServer) DropTable(context.Context, *DropTableRequest) (*DropTableResponse, error) {
return nil, status.Error(codes.Unimplemented, "method DropTable not implemented")
}
func (UnimplementedCatalogServer) ListTables(context.Context, *ListTablesRequest) (*ListTablesResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ListTables not implemented")
}
func (UnimplementedCatalogServer) DescribeTable(context.Context, *DescribeTableRequest) (*DescribeTableResponse, error) {
return nil, status.Error(codes.Unimplemented, "method DescribeTable not implemented")
}
func (UnimplementedCatalogServer) InsertRows(context.Context, *InsertRowsRequest) (*InsertRowsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method InsertRows not implemented")
}
func (UnimplementedCatalogServer) ListRows(context.Context, *ListRowsRequest) (*ListRowsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ListRows not implemented")
}
func (UnimplementedCatalogServer) ListRoutines(context.Context, *ListRoutinesRequest) (*ListRoutinesResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ListRoutines not implemented")
}
func (UnimplementedCatalogServer) GetRoutine(context.Context, *GetRoutineRequest) (*GetRoutineResponse, error) {
return nil, status.Error(codes.Unimplemented, "method GetRoutine not implemented")
}
func (UnimplementedCatalogServer) UpsertRoutine(context.Context, *UpsertRoutineRequest) (*UpsertRoutineResponse, error) {
return nil, status.Error(codes.Unimplemented, "method UpsertRoutine not implemented")
}
func (UnimplementedCatalogServer) DeleteRoutine(context.Context, *DeleteRoutineRequest) (*DeleteRoutineResponse, error) {
return nil, status.Error(codes.Unimplemented, "method DeleteRoutine not implemented")
}
func (UnimplementedCatalogServer) UpsertRowAccessPolicy(context.Context, *UpsertRowAccessPolicyRequest) (*UpsertRowAccessPolicyResponse, error) {
return nil, status.Error(codes.Unimplemented, "method UpsertRowAccessPolicy not implemented")
}
func (UnimplementedCatalogServer) DeleteRowAccessPolicy(context.Context, *DeleteRowAccessPolicyRequest) (*DeleteRowAccessPolicyResponse, error) {
return nil, status.Error(codes.Unimplemented, "method DeleteRowAccessPolicy not implemented")
}
func (UnimplementedCatalogServer) ListRowAccessPolicies(context.Context, *ListRowAccessPoliciesRequest) (*ListRowAccessPoliciesResponse, error) {
return nil, status.Error(codes.Unimplemented, "method ListRowAccessPolicies not implemented")
}
func (UnimplementedCatalogServer) SetColumnGovernance(context.Context, *SetColumnGovernanceRequest) (*SetColumnGovernanceResponse, error) {
return nil, status.Error(codes.Unimplemented, "method SetColumnGovernance not implemented")
}
func (UnimplementedCatalogServer) testEmbeddedByValue() {}
// UnsafeCatalogServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to CatalogServer will
// result in compilation errors.
type UnsafeCatalogServer interface {
mustEmbedUnimplementedCatalogServer()
}
func RegisterCatalogServer(s grpc.ServiceRegistrar, srv CatalogServer) {
// If the following call panics, it indicates UnimplementedCatalogServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&Catalog_ServiceDesc, srv)
}
func _Catalog_RegisterDataset_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(RegisterDatasetRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).RegisterDataset(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_RegisterDataset_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).RegisterDataset(ctx, req.(*RegisterDatasetRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_DropDataset_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(DropDatasetRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).DropDataset(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_DropDataset_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).DropDataset(ctx, req.(*DropDatasetRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_UndeleteDataset_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(UndeleteDatasetRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).UndeleteDataset(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_UndeleteDataset_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).UndeleteDataset(ctx, req.(*UndeleteDatasetRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_ListDatasets_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ListDatasetsRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).ListDatasets(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_ListDatasets_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).ListDatasets(ctx, req.(*ListDatasetsRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_RegisterTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(RegisterTableRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).RegisterTable(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_RegisterTable_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).RegisterTable(ctx, req.(*RegisterTableRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_DropTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(DropTableRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).DropTable(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_DropTable_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).DropTable(ctx, req.(*DropTableRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_ListTables_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ListTablesRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).ListTables(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_ListTables_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).ListTables(ctx, req.(*ListTablesRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_DescribeTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(DescribeTableRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).DescribeTable(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_DescribeTable_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).DescribeTable(ctx, req.(*DescribeTableRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_InsertRows_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(InsertRowsRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).InsertRows(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_InsertRows_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).InsertRows(ctx, req.(*InsertRowsRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_ListRows_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ListRowsRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).ListRows(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_ListRows_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).ListRows(ctx, req.(*ListRowsRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_ListRoutines_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ListRoutinesRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).ListRoutines(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_ListRoutines_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).ListRoutines(ctx, req.(*ListRoutinesRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_GetRoutine_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GetRoutineRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).GetRoutine(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_GetRoutine_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).GetRoutine(ctx, req.(*GetRoutineRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_UpsertRoutine_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(UpsertRoutineRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).UpsertRoutine(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_UpsertRoutine_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).UpsertRoutine(ctx, req.(*UpsertRoutineRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_DeleteRoutine_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(DeleteRoutineRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).DeleteRoutine(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_DeleteRoutine_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).DeleteRoutine(ctx, req.(*DeleteRoutineRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_UpsertRowAccessPolicy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(UpsertRowAccessPolicyRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).UpsertRowAccessPolicy(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_UpsertRowAccessPolicy_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).UpsertRowAccessPolicy(ctx, req.(*UpsertRowAccessPolicyRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_DeleteRowAccessPolicy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(DeleteRowAccessPolicyRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).DeleteRowAccessPolicy(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_DeleteRowAccessPolicy_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).DeleteRowAccessPolicy(ctx, req.(*DeleteRowAccessPolicyRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_ListRowAccessPolicies_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ListRowAccessPoliciesRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).ListRowAccessPolicies(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_ListRowAccessPolicies_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).ListRowAccessPolicies(ctx, req.(*ListRowAccessPoliciesRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Catalog_SetColumnGovernance_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(SetColumnGovernanceRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(CatalogServer).SetColumnGovernance(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Catalog_SetColumnGovernance_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(CatalogServer).SetColumnGovernance(ctx, req.(*SetColumnGovernanceRequest))
}
return interceptor(ctx, in, info, handler)
}
// Catalog_ServiceDesc is the grpc.ServiceDesc for Catalog service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Catalog_ServiceDesc = grpc.ServiceDesc{
ServiceName: "bigquery_emulator.v1.Catalog",
HandlerType: (*CatalogServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "RegisterDataset",
Handler: _Catalog_RegisterDataset_Handler,
},
{
MethodName: "DropDataset",
Handler: _Catalog_DropDataset_Handler,
},
{
MethodName: "UndeleteDataset",
Handler: _Catalog_UndeleteDataset_Handler,
},
{
MethodName: "ListDatasets",
Handler: _Catalog_ListDatasets_Handler,
},
{
MethodName: "RegisterTable",
Handler: _Catalog_RegisterTable_Handler,
},
{
MethodName: "DropTable",
Handler: _Catalog_DropTable_Handler,
},
{
MethodName: "ListTables",
Handler: _Catalog_ListTables_Handler,
},
{
MethodName: "DescribeTable",
Handler: _Catalog_DescribeTable_Handler,
},
{
MethodName: "InsertRows",
Handler: _Catalog_InsertRows_Handler,
},
{
MethodName: "ListRows",
Handler: _Catalog_ListRows_Handler,
},
{
MethodName: "ListRoutines",
Handler: _Catalog_ListRoutines_Handler,
},
{
MethodName: "GetRoutine",
Handler: _Catalog_GetRoutine_Handler,
},
{
MethodName: "UpsertRoutine",
Handler: _Catalog_UpsertRoutine_Handler,
},
{
MethodName: "DeleteRoutine",
Handler: _Catalog_DeleteRoutine_Handler,
},
{
MethodName: "UpsertRowAccessPolicy",
Handler: _Catalog_UpsertRowAccessPolicy_Handler,
},
{
MethodName: "DeleteRowAccessPolicy",
Handler: _Catalog_DeleteRowAccessPolicy_Handler,
},
{
MethodName: "ListRowAccessPolicies",
Handler: _Catalog_ListRowAccessPolicies_Handler,
},
{
MethodName: "SetColumnGovernance",
Handler: _Catalog_SetColumnGovernance_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "emulator.proto",
}
const (
Query_DryRun_FullMethodName = "/bigquery_emulator.v1.Query/DryRun"
Query_ExecuteQuery_FullMethodName = "/bigquery_emulator.v1.Query/ExecuteQuery"
)
// QueryClient is the client API for Query service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type QueryClient interface {
DryRun(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*DryRunResponse, error)
ExecuteQuery(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[QueryResultRow], error)
}
type queryClient struct {
cc grpc.ClientConnInterface
}
func NewQueryClient(cc grpc.ClientConnInterface) QueryClient {
return &queryClient{cc}
}
func (c *queryClient) DryRun(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*DryRunResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(DryRunResponse)
err := c.cc.Invoke(ctx, Query_DryRun_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *queryClient) ExecuteQuery(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[QueryResultRow], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &Query_ServiceDesc.Streams[0], Query_ExecuteQuery_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[QueryRequest, QueryResultRow]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Query_ExecuteQueryClient = grpc.ServerStreamingClient[QueryResultRow]
// QueryServer is the server API for Query service.
// All implementations should embed UnimplementedQueryServer
// for forward compatibility.
type QueryServer interface {
DryRun(context.Context, *QueryRequest) (*DryRunResponse, error)
ExecuteQuery(*QueryRequest, grpc.ServerStreamingServer[QueryResultRow]) error
}
// UnimplementedQueryServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedQueryServer struct{}
func (UnimplementedQueryServer) DryRun(context.Context, *QueryRequest) (*DryRunResponse, error) {
return nil, status.Error(codes.Unimplemented, "method DryRun not implemented")
}
func (UnimplementedQueryServer) ExecuteQuery(*QueryRequest, grpc.ServerStreamingServer[QueryResultRow]) error {
return status.Error(codes.Unimplemented, "method ExecuteQuery not implemented")
}
func (UnimplementedQueryServer) testEmbeddedByValue() {}
// UnsafeQueryServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to QueryServer will
// result in compilation errors.
type UnsafeQueryServer interface {
mustEmbedUnimplementedQueryServer()
}
func RegisterQueryServer(s grpc.ServiceRegistrar, srv QueryServer) {
// If the following call panics, it indicates UnimplementedQueryServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&Query_ServiceDesc, srv)
}
func _Query_DryRun_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(QueryRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(QueryServer).DryRun(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: Query_DryRun_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(QueryServer).DryRun(ctx, req.(*QueryRequest))
}
return interceptor(ctx, in, info, handler)
}
func _Query_ExecuteQuery_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(QueryRequest)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(QueryServer).ExecuteQuery(m, &grpc.GenericServerStream[QueryRequest, QueryResultRow]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Query_ExecuteQueryServer = grpc.ServerStreamingServer[QueryResultRow]
// Query_ServiceDesc is the grpc.ServiceDesc for Query service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Query_ServiceDesc = grpc.ServiceDesc{
ServiceName: "bigquery_emulator.v1.Query",
HandlerType: (*QueryServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "DryRun",
Handler: _Query_DryRun_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "ExecuteQuery",
Handler: _Query_ExecuteQuery_Handler,
ServerStreams: true,
},
},
Metadata: "emulator.proto",
}
const (
SqlTools_Format_FullMethodName = "/bigquery_emulator.v1.SqlTools/Format"
SqlTools_Parse_FullMethodName = "/bigquery_emulator.v1.SqlTools/Parse"
SqlTools_Tokenize_FullMethodName = "/bigquery_emulator.v1.SqlTools/Tokenize"
SqlTools_Complete_FullMethodName = "/bigquery_emulator.v1.SqlTools/Complete"
SqlTools_Analyze_FullMethodName = "/bigquery_emulator.v1.SqlTools/Analyze"
)
// SqlToolsClient is the client API for SqlTools service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type SqlToolsClient interface {
Format(ctx context.Context, in *FormatSqlRequest, opts ...grpc.CallOption) (*FormatSqlResponse, error)
Parse(ctx context.Context, in *ParseSqlRequest, opts ...grpc.CallOption) (*ParseSqlResponse, error)
Tokenize(ctx context.Context, in *TokenizeSqlRequest, opts ...grpc.CallOption) (*TokenizeSqlResponse, error)
Complete(ctx context.Context, in *CompleteSqlRequest, opts ...grpc.CallOption) (*CompleteSqlResponse, error)
Analyze(ctx context.Context, in *AnalyzeSqlRequest, opts ...grpc.CallOption) (*AnalyzeSqlResponse, error)
}
type sqlToolsClient struct {
cc grpc.ClientConnInterface
}
func NewSqlToolsClient(cc grpc.ClientConnInterface) SqlToolsClient {
return &sqlToolsClient{cc}
}
func (c *sqlToolsClient) Format(ctx context.Context, in *FormatSqlRequest, opts ...grpc.CallOption) (*FormatSqlResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(FormatSqlResponse)
err := c.cc.Invoke(ctx, SqlTools_Format_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *sqlToolsClient) Parse(ctx context.Context, in *ParseSqlRequest, opts ...grpc.CallOption) (*ParseSqlResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ParseSqlResponse)
err := c.cc.Invoke(ctx, SqlTools_Parse_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *sqlToolsClient) Tokenize(ctx context.Context, in *TokenizeSqlRequest, opts ...grpc.CallOption) (*TokenizeSqlResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(TokenizeSqlResponse)
err := c.cc.Invoke(ctx, SqlTools_Tokenize_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *sqlToolsClient) Complete(ctx context.Context, in *CompleteSqlRequest, opts ...grpc.CallOption) (*CompleteSqlResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(CompleteSqlResponse)
err := c.cc.Invoke(ctx, SqlTools_Complete_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *sqlToolsClient) Analyze(ctx context.Context, in *AnalyzeSqlRequest, opts ...grpc.CallOption) (*AnalyzeSqlResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(AnalyzeSqlResponse)
err := c.cc.Invoke(ctx, SqlTools_Analyze_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// SqlToolsServer is the server API for SqlTools service.
// All implementations should embed UnimplementedSqlToolsServer
// for forward compatibility.
type SqlToolsServer interface {
Format(context.Context, *FormatSqlRequest) (*FormatSqlResponse, error)
Parse(context.Context, *ParseSqlRequest) (*ParseSqlResponse, error)
Tokenize(context.Context, *TokenizeSqlRequest) (*TokenizeSqlResponse, error)
Complete(context.Context, *CompleteSqlRequest) (*CompleteSqlResponse, error)
Analyze(context.Context, *AnalyzeSqlRequest) (*AnalyzeSqlResponse, error)
}
// UnimplementedSqlToolsServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedSqlToolsServer struct{}
func (UnimplementedSqlToolsServer) Format(context.Context, *FormatSqlRequest) (*FormatSqlResponse, error) {
return nil, status.Error(codes.Unimplemented, "method Format not implemented")
}
func (UnimplementedSqlToolsServer) Parse(context.Context, *ParseSqlRequest) (*ParseSqlResponse, error) {
return nil, status.Error(codes.Unimplemented, "method Parse not implemented")
}
func (UnimplementedSqlToolsServer) Tokenize(context.Context, *TokenizeSqlRequest) (*TokenizeSqlResponse, error) {
return nil, status.Error(codes.Unimplemented, "method Tokenize not implemented")
}
func (UnimplementedSqlToolsServer) Complete(context.Context, *CompleteSqlRequest) (*CompleteSqlResponse, error) {
return nil, status.Error(codes.Unimplemented, "method Complete not implemented")
}
func (UnimplementedSqlToolsServer) Analyze(context.Context, *AnalyzeSqlRequest) (*AnalyzeSqlResponse, error) {
return nil, status.Error(codes.Unimplemented, "method Analyze not implemented")
}
func (UnimplementedSqlToolsServer) testEmbeddedByValue() {}
// UnsafeSqlToolsServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to SqlToolsServer will
// result in compilation errors.
type UnsafeSqlToolsServer interface {
mustEmbedUnimplementedSqlToolsServer()
}
func RegisterSqlToolsServer(s grpc.ServiceRegistrar, srv SqlToolsServer) {
// If the following call panics, it indicates UnimplementedSqlToolsServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&SqlTools_ServiceDesc, srv)
}
func _SqlTools_Format_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(FormatSqlRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(SqlToolsServer).Format(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: SqlTools_Format_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(SqlToolsServer).Format(ctx, req.(*FormatSqlRequest))
}
return interceptor(ctx, in, info, handler)
}
func _SqlTools_Parse_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(ParseSqlRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(SqlToolsServer).Parse(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: SqlTools_Parse_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(SqlToolsServer).Parse(ctx, req.(*ParseSqlRequest))
}
return interceptor(ctx, in, info, handler)
}
func _SqlTools_Tokenize_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(TokenizeSqlRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(SqlToolsServer).Tokenize(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: SqlTools_Tokenize_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(SqlToolsServer).Tokenize(ctx, req.(*TokenizeSqlRequest))
}
return interceptor(ctx, in, info, handler)
}
func _SqlTools_Complete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CompleteSqlRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(SqlToolsServer).Complete(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: SqlTools_Complete_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(SqlToolsServer).Complete(ctx, req.(*CompleteSqlRequest))
}
return interceptor(ctx, in, info, handler)
}
func _SqlTools_Analyze_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(AnalyzeSqlRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(SqlToolsServer).Analyze(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: SqlTools_Analyze_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(SqlToolsServer).Analyze(ctx, req.(*AnalyzeSqlRequest))
}
return interceptor(ctx, in, info, handler)
}
// SqlTools_ServiceDesc is the grpc.ServiceDesc for SqlTools service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var SqlTools_ServiceDesc = grpc.ServiceDesc{
ServiceName: "bigquery_emulator.v1.SqlTools",
HandlerType: (*SqlToolsServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "Format",
Handler: _SqlTools_Format_Handler,
},
{
MethodName: "Parse",
Handler: _SqlTools_Parse_Handler,
},
{
MethodName: "Tokenize",
Handler: _SqlTools_Tokenize_Handler,
},
{
MethodName: "Complete",
Handler: _SqlTools_Complete_Handler,
},
{
MethodName: "Analyze",
Handler: _SqlTools_Analyze_Handler,
},
},
Streams: []grpc.StreamDesc{},
Metadata: "emulator.proto",
}
// storage_read.proto is the internal contract for the BigQuery Storage
// Read API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.list` reads against large
// tables into Storage Read RPCs against this service so the C++ engine
// can stream rows back without the entire result set living in the
// gateway's heap.
//
// The engine implements `CreateReadSession` and the streaming
// `ReadRows` reply, including per-column projection
// (`selected_fields`) and `row_restriction` pushdown, and the gateway
// is wired to it (with e2e coverage under `gateway/e2e/`). The shape
// here is the **simplified** Storage Read v1 contract: no Arrow/Avro
// projections (rows ride on the same `DataRow` cells that
// `Catalog.ListRows` already returns), no SplitReadStream RPC, no
// session liveness extension. Those are documented as "future" so
// the conformance harness can pin per-feature gaps.
//
// Code generation:
// - Go: `task proto:gen` writes
// `gateway/enginepb/storage_read.{pb,grpc.pb}.go`. Same
// plugin pipeline as `emulator.proto`.
// - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
// `proto/BUILD.bazel` emit
// `storage_read.{pb,grpc.pb}.{h,cc}` into bazel-bin.
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.11
// protoc (unknown)
// source: storage_read.proto
package enginepb
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
type CreateReadSessionRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// BigQuery resource path the session belongs to: `projects/{project_id}`.
// The gateway derives this from the REST URL path so different
// projects do not see each other's sessions.
Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
// Session shape the caller wants to read. `read_session.table` names
// the table; `read_options.selected_fields` and
// `read_options.row_restriction` are validated here and applied when
// the streams are drained.
ReadSession *ReadSession `protobuf:"bytes,2,opt,name=read_session,json=readSession,proto3" json:"read_session,omitempty"`
// Maximum number of streams the caller is willing to drain in
// parallel. When unset or zero the engine returns one stream. The
// server may return fewer streams than requested depending on table
// size (each stream must cover at least one row when possible).
MaxStreamCount int32 `protobuf:"varint,3,opt,name=max_stream_count,json=maxStreamCount,proto3" json:"max_stream_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CreateReadSessionRequest) Reset() {
*x = CreateReadSessionRequest{}
mi := &file_storage_read_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CreateReadSessionRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CreateReadSessionRequest) ProtoMessage() {}
func (x *CreateReadSessionRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CreateReadSessionRequest.ProtoReflect.Descriptor instead.
func (*CreateReadSessionRequest) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{0}
}
func (x *CreateReadSessionRequest) GetParent() string {
if x != nil {
return x.Parent
}
return ""
}
func (x *CreateReadSessionRequest) GetReadSession() *ReadSession {
if x != nil {
return x.ReadSession
}
return nil
}
func (x *CreateReadSessionRequest) GetMaxStreamCount() int32 {
if x != nil {
return x.MaxStreamCount
}
return 0
}
type ReadSession struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Server-assigned session id, of the form
// `projects/{project_id}/locations/{location}/sessions/{session_id}`.
// The gateway returns this in the REST `name` field so a follow-up
// `ReadRows` call can find the session again.
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
// Fully-qualified BigQuery table id the session is reading from:
// `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`.
// Set on the request side (caller specifies which table to read);
// echoed back on the response side so the caller does not have to
// re-derive it.
Table string `protobuf:"bytes,2,opt,name=table,proto3" json:"table,omitempty"`
// Schema of the rows the streams will emit. Populated by the engine
// from `Storage::GetSchema` so the caller can decode the cells the
// same way it decodes `tabledata.list` rows.
Schema *TableSchema `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"`
// Subset of fields to read; the engine validates the list at
// session creation and projects rows to these columns on ReadRows.
ReadOptions *ReadOptions `protobuf:"bytes,4,opt,name=read_options,json=readOptions,proto3" json:"read_options,omitempty"`
// Streams the caller can drain. Each one is independent and any
// single row in `table` appears in exactly one stream. The engine
// always returns a single stream with the full table.
Streams []*ReadStream `protobuf:"bytes,5,rep,name=streams,proto3" json:"streams,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReadSession) Reset() {
*x = ReadSession{}
mi := &file_storage_read_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReadSession) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReadSession) ProtoMessage() {}
func (x *ReadSession) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReadSession.ProtoReflect.Descriptor instead.
func (*ReadSession) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{1}
}
func (x *ReadSession) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *ReadSession) GetTable() string {
if x != nil {
return x.Table
}
return ""
}
func (x *ReadSession) GetSchema() *TableSchema {
if x != nil {
return x.Schema
}
return nil
}
func (x *ReadSession) GetReadOptions() *ReadOptions {
if x != nil {
return x.ReadOptions
}
return nil
}
func (x *ReadSession) GetStreams() []*ReadStream {
if x != nil {
return x.Streams
}
return nil
}
type ReadOptions struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Names of the columns the caller wants returned. Empty / unset
// means "all columns". Non-empty lists are validated at session
// creation and rows are projected to exactly these columns.
SelectedFields []string `protobuf:"bytes,1,rep,name=selected_fields,json=selectedFields,proto3" json:"selected_fields,omitempty"`
// SQL-shaped filter expression the engine pushes down. The
// restriction is analyzed with GoogleSQL against the table schema
// and transpiled into a DuckDB `WHERE` clause; unsupported shapes
// reject at CreateReadSession with INVALID_ARGUMENT. The predicate
// is applied before `offset`, matching BigQuery's documented
// semantics.
RowRestriction string `protobuf:"bytes,2,opt,name=row_restriction,json=rowRestriction,proto3" json:"row_restriction,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReadOptions) Reset() {
*x = ReadOptions{}
mi := &file_storage_read_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReadOptions) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReadOptions) ProtoMessage() {}
func (x *ReadOptions) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReadOptions.ProtoReflect.Descriptor instead.
func (*ReadOptions) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{2}
}
func (x *ReadOptions) GetSelectedFields() []string {
if x != nil {
return x.SelectedFields
}
return nil
}
func (x *ReadOptions) GetRowRestriction() string {
if x != nil {
return x.RowRestriction
}
return ""
}
type ReadStream struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Server-assigned stream id of the form
// `{session_name}/streams/{stream_id}`. The caller passes this
// back to `ReadRows` to drain rows off this stream.
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReadStream) Reset() {
*x = ReadStream{}
mi := &file_storage_read_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReadStream) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReadStream) ProtoMessage() {}
func (x *ReadStream) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReadStream.ProtoReflect.Descriptor instead.
func (*ReadStream) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{3}
}
func (x *ReadStream) GetName() string {
if x != nil {
return x.Name
}
return ""
}
type ReadRowsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Stream id returned by `CreateReadSession`. The engine looks the
// session up by stripping the trailing `/streams/{id}` and finding
// the matching `ReadSession`.
ReadStream string `protobuf:"bytes,1,opt,name=read_stream,json=readStream,proto3" json:"read_stream,omitempty"`
// Row offset to resume from. The gateway uses this to re-attach to
// a stream after a transient failure without re-driving rows the
// client already received. The engine honors the offset (counted
// over the post-filter row stream).
Offset int64 `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReadRowsRequest) Reset() {
*x = ReadRowsRequest{}
mi := &file_storage_read_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReadRowsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReadRowsRequest) ProtoMessage() {}
func (x *ReadRowsRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReadRowsRequest.ProtoReflect.Descriptor instead.
func (*ReadRowsRequest) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{4}
}
func (x *ReadRowsRequest) GetReadStream() string {
if x != nil {
return x.ReadStream
}
return ""
}
func (x *ReadRowsRequest) GetOffset() int64 {
if x != nil {
return x.Offset
}
return 0
}
type ReadRowsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Rows on this page of the stream. The engine streams rows in
// fixed-size pages (see `kReadRowsBatchSize` in
// `frontend/handlers/storage_read.cc`).
Rows []*DataRow `protobuf:"bytes,1,rep,name=rows,proto3" json:"rows,omitempty"`
// Number of rows in this page. Convenience field so callers do not
// have to count `rows.size()` on every reply.
RowCount int64 `protobuf:"varint,2,opt,name=row_count,json=rowCount,proto3" json:"row_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *ReadRowsResponse) Reset() {
*x = ReadRowsResponse{}
mi := &file_storage_read_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *ReadRowsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*ReadRowsResponse) ProtoMessage() {}
func (x *ReadRowsResponse) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use ReadRowsResponse.ProtoReflect.Descriptor instead.
func (*ReadRowsResponse) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{5}
}
func (x *ReadRowsResponse) GetRows() []*DataRow {
if x != nil {
return x.Rows
}
return nil
}
func (x *ReadRowsResponse) GetRowCount() int64 {
if x != nil {
return x.RowCount
}
return 0
}
type SplitReadStreamRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Stream id to split (`{session_name}/streams/{id}`).
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
// Fraction in (0.0, 1.0) of the stream's remaining row range at
// which to split. Values outside that open interval are rejected.
Fraction float64 `protobuf:"fixed64,2,opt,name=fraction,proto3" json:"fraction,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SplitReadStreamRequest) Reset() {
*x = SplitReadStreamRequest{}
mi := &file_storage_read_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SplitReadStreamRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SplitReadStreamRequest) ProtoMessage() {}
func (x *SplitReadStreamRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[6]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SplitReadStreamRequest.ProtoReflect.Descriptor instead.
func (*SplitReadStreamRequest) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{6}
}
func (x *SplitReadStreamRequest) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *SplitReadStreamRequest) GetFraction() float64 {
if x != nil {
return x.Fraction
}
return 0
}
type SplitReadStreamResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Head portion of the split stream's remaining range.
PrimaryStream *ReadStream `protobuf:"bytes,1,opt,name=primary_stream,json=primaryStream,proto3" json:"primary_stream,omitempty"`
// Tail portion of the split stream's remaining range.
RemainderStream *ReadStream `protobuf:"bytes,2,opt,name=remainder_stream,json=remainderStream,proto3" json:"remainder_stream,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *SplitReadStreamResponse) Reset() {
*x = SplitReadStreamResponse{}
mi := &file_storage_read_proto_msgTypes[7]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *SplitReadStreamResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*SplitReadStreamResponse) ProtoMessage() {}
func (x *SplitReadStreamResponse) ProtoReflect() protoreflect.Message {
mi := &file_storage_read_proto_msgTypes[7]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use SplitReadStreamResponse.ProtoReflect.Descriptor instead.
func (*SplitReadStreamResponse) Descriptor() ([]byte, []int) {
return file_storage_read_proto_rawDescGZIP(), []int{7}
}
func (x *SplitReadStreamResponse) GetPrimaryStream() *ReadStream {
if x != nil {
return x.PrimaryStream
}
return nil
}
func (x *SplitReadStreamResponse) GetRemainderStream() *ReadStream {
if x != nil {
return x.RemainderStream
}
return nil
}
var File_storage_read_proto protoreflect.FileDescriptor
const file_storage_read_proto_rawDesc = "" +
"\n" +
"\x12storage_read.proto\x12\x14bigquery_emulator.v1\x1a\x0eemulator.proto\"\xa2\x01\n" +
"\x18CreateReadSessionRequest\x12\x16\n" +
"\x06parent\x18\x01 \x01(\tR\x06parent\x12D\n" +
"\fread_session\x18\x02 \x01(\v2!.bigquery_emulator.v1.ReadSessionR\vreadSession\x12(\n" +
"\x10max_stream_count\x18\x03 \x01(\x05R\x0emaxStreamCount\"\xf4\x01\n" +
"\vReadSession\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n" +
"\x05table\x18\x02 \x01(\tR\x05table\x129\n" +
"\x06schema\x18\x03 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12D\n" +
"\fread_options\x18\x04 \x01(\v2!.bigquery_emulator.v1.ReadOptionsR\vreadOptions\x12:\n" +
"\astreams\x18\x05 \x03(\v2 .bigquery_emulator.v1.ReadStreamR\astreams\"_\n" +
"\vReadOptions\x12'\n" +
"\x0fselected_fields\x18\x01 \x03(\tR\x0eselectedFields\x12'\n" +
"\x0frow_restriction\x18\x02 \x01(\tR\x0erowRestriction\" \n" +
"\n" +
"ReadStream\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\"J\n" +
"\x0fReadRowsRequest\x12\x1f\n" +
"\vread_stream\x18\x01 \x01(\tR\n" +
"readStream\x12\x16\n" +
"\x06offset\x18\x02 \x01(\x03R\x06offset\"b\n" +
"\x10ReadRowsResponse\x121\n" +
"\x04rows\x18\x01 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\x12\x1b\n" +
"\trow_count\x18\x02 \x01(\x03R\browCount\"H\n" +
"\x16SplitReadStreamRequest\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12\x1a\n" +
"\bfraction\x18\x02 \x01(\x01R\bfraction\"\xaf\x01\n" +
"\x17SplitReadStreamResponse\x12G\n" +
"\x0eprimary_stream\x18\x01 \x01(\v2 .bigquery_emulator.v1.ReadStreamR\rprimaryStream\x12K\n" +
"\x10remainder_stream\x18\x02 \x01(\v2 .bigquery_emulator.v1.ReadStreamR\x0fremainderStream2\xc2\x02\n" +
"\vStorageRead\x12f\n" +
"\x11CreateReadSession\x12..bigquery_emulator.v1.CreateReadSessionRequest\x1a!.bigquery_emulator.v1.ReadSession\x12[\n" +
"\bReadRows\x12%.bigquery_emulator.v1.ReadRowsRequest\x1a&.bigquery_emulator.v1.ReadRowsResponse0\x01\x12n\n" +
"\x0fSplitReadStream\x12,.bigquery_emulator.v1.SplitReadStreamRequest\x1a-.bigquery_emulator.v1.SplitReadStreamResponseBFZAgithub.com/vantaboard/bigquery-emulator/gateway/enginepb;enginepb\xf8\x01\x01b\x06proto3"
var (
file_storage_read_proto_rawDescOnce sync.Once
file_storage_read_proto_rawDescData []byte
)
func file_storage_read_proto_rawDescGZIP() []byte {
file_storage_read_proto_rawDescOnce.Do(func() {
file_storage_read_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_storage_read_proto_rawDesc), len(file_storage_read_proto_rawDesc)))
})
return file_storage_read_proto_rawDescData
}
var file_storage_read_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
var file_storage_read_proto_goTypes = []any{
(*CreateReadSessionRequest)(nil), // 0: bigquery_emulator.v1.CreateReadSessionRequest
(*ReadSession)(nil), // 1: bigquery_emulator.v1.ReadSession
(*ReadOptions)(nil), // 2: bigquery_emulator.v1.ReadOptions
(*ReadStream)(nil), // 3: bigquery_emulator.v1.ReadStream
(*ReadRowsRequest)(nil), // 4: bigquery_emulator.v1.ReadRowsRequest
(*ReadRowsResponse)(nil), // 5: bigquery_emulator.v1.ReadRowsResponse
(*SplitReadStreamRequest)(nil), // 6: bigquery_emulator.v1.SplitReadStreamRequest
(*SplitReadStreamResponse)(nil), // 7: bigquery_emulator.v1.SplitReadStreamResponse
(*TableSchema)(nil), // 8: bigquery_emulator.v1.TableSchema
(*DataRow)(nil), // 9: bigquery_emulator.v1.DataRow
}
var file_storage_read_proto_depIdxs = []int32{
1, // 0: bigquery_emulator.v1.CreateReadSessionRequest.read_session:type_name -> bigquery_emulator.v1.ReadSession
8, // 1: bigquery_emulator.v1.ReadSession.schema:type_name -> bigquery_emulator.v1.TableSchema
2, // 2: bigquery_emulator.v1.ReadSession.read_options:type_name -> bigquery_emulator.v1.ReadOptions
3, // 3: bigquery_emulator.v1.ReadSession.streams:type_name -> bigquery_emulator.v1.ReadStream
9, // 4: bigquery_emulator.v1.ReadRowsResponse.rows:type_name -> bigquery_emulator.v1.DataRow
3, // 5: bigquery_emulator.v1.SplitReadStreamResponse.primary_stream:type_name -> bigquery_emulator.v1.ReadStream
3, // 6: bigquery_emulator.v1.SplitReadStreamResponse.remainder_stream:type_name -> bigquery_emulator.v1.ReadStream
0, // 7: bigquery_emulator.v1.StorageRead.CreateReadSession:input_type -> bigquery_emulator.v1.CreateReadSessionRequest
4, // 8: bigquery_emulator.v1.StorageRead.ReadRows:input_type -> bigquery_emulator.v1.ReadRowsRequest
6, // 9: bigquery_emulator.v1.StorageRead.SplitReadStream:input_type -> bigquery_emulator.v1.SplitReadStreamRequest
1, // 10: bigquery_emulator.v1.StorageRead.CreateReadSession:output_type -> bigquery_emulator.v1.ReadSession
5, // 11: bigquery_emulator.v1.StorageRead.ReadRows:output_type -> bigquery_emulator.v1.ReadRowsResponse
7, // 12: bigquery_emulator.v1.StorageRead.SplitReadStream:output_type -> bigquery_emulator.v1.SplitReadStreamResponse
10, // [10:13] is the sub-list for method output_type
7, // [7:10] is the sub-list for method input_type
7, // [7:7] is the sub-list for extension type_name
7, // [7:7] is the sub-list for extension extendee
0, // [0:7] is the sub-list for field type_name
}
func init() { file_storage_read_proto_init() }
func file_storage_read_proto_init() {
if File_storage_read_proto != nil {
return
}
file_emulator_proto_init()
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_storage_read_proto_rawDesc), len(file_storage_read_proto_rawDesc)),
NumEnums: 0,
NumMessages: 8,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_storage_read_proto_goTypes,
DependencyIndexes: file_storage_read_proto_depIdxs,
MessageInfos: file_storage_read_proto_msgTypes,
}.Build()
File_storage_read_proto = out.File
file_storage_read_proto_goTypes = nil
file_storage_read_proto_depIdxs = nil
}
// storage_read.proto is the internal contract for the BigQuery Storage
// Read API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.list` reads against large
// tables into Storage Read RPCs against this service so the C++ engine
// can stream rows back without the entire result set living in the
// gateway's heap.
//
// The engine implements `CreateReadSession` and the streaming
// `ReadRows` reply, including per-column projection
// (`selected_fields`) and `row_restriction` pushdown, and the gateway
// is wired to it (with e2e coverage under `gateway/e2e/`). The shape
// here is the **simplified** Storage Read v1 contract: no Arrow/Avro
// projections (rows ride on the same `DataRow` cells that
// `Catalog.ListRows` already returns), no SplitReadStream RPC, no
// session liveness extension. Those are documented as "future" so
// the conformance harness can pin per-feature gaps.
//
// Code generation:
// - Go: `task proto:gen` writes
// `gateway/enginepb/storage_read.{pb,grpc.pb}.go`. Same
// plugin pipeline as `emulator.proto`.
// - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
// `proto/BUILD.bazel` emit
// `storage_read.{pb,grpc.pb}.{h,cc}` into bazel-bin.
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.2
// - protoc (unknown)
// source: storage_read.proto
package enginepb
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
StorageRead_CreateReadSession_FullMethodName = "/bigquery_emulator.v1.StorageRead/CreateReadSession"
StorageRead_ReadRows_FullMethodName = "/bigquery_emulator.v1.StorageRead/ReadRows"
StorageRead_SplitReadStream_FullMethodName = "/bigquery_emulator.v1.StorageRead/SplitReadStream"
)
// StorageReadClient is the client API for StorageRead service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type StorageReadClient interface {
// CreateReadSession validates the request, materializes a session
// pinning the table + schema, and returns the session handle the
// caller will pass back to `ReadRows`. The reply carries the table
// schema verbatim so the caller does not need a follow-up
// `Catalog.DescribeTable` round-trip before starting to read.
CreateReadSession(ctx context.Context, in *CreateReadSessionRequest, opts ...grpc.CallOption) (*ReadSession, error)
// ReadRows streams rows off the named stream. The stream id must be
// one of the `ReadSession.streams[*].name` values returned by the
// matching `CreateReadSession` call (or a child stream minted by
// `SplitReadStream`).
ReadRows(ctx context.Context, in *ReadRowsRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ReadRowsResponse], error)
// SplitReadStream subdivides an existing stream's remaining row
// range into a primary stream (head) and a residual stream (tail).
SplitReadStream(ctx context.Context, in *SplitReadStreamRequest, opts ...grpc.CallOption) (*SplitReadStreamResponse, error)
}
type storageReadClient struct {
cc grpc.ClientConnInterface
}
func NewStorageReadClient(cc grpc.ClientConnInterface) StorageReadClient {
return &storageReadClient{cc}
}
func (c *storageReadClient) CreateReadSession(ctx context.Context, in *CreateReadSessionRequest, opts ...grpc.CallOption) (*ReadSession, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(ReadSession)
err := c.cc.Invoke(ctx, StorageRead_CreateReadSession_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *storageReadClient) ReadRows(ctx context.Context, in *ReadRowsRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ReadRowsResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &StorageRead_ServiceDesc.Streams[0], StorageRead_ReadRows_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[ReadRowsRequest, ReadRowsResponse]{ClientStream: stream}
if err := x.ClientStream.SendMsg(in); err != nil {
return nil, err
}
if err := x.ClientStream.CloseSend(); err != nil {
return nil, err
}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageRead_ReadRowsClient = grpc.ServerStreamingClient[ReadRowsResponse]
func (c *storageReadClient) SplitReadStream(ctx context.Context, in *SplitReadStreamRequest, opts ...grpc.CallOption) (*SplitReadStreamResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(SplitReadStreamResponse)
err := c.cc.Invoke(ctx, StorageRead_SplitReadStream_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// StorageReadServer is the server API for StorageRead service.
// All implementations should embed UnimplementedStorageReadServer
// for forward compatibility.
type StorageReadServer interface {
// CreateReadSession validates the request, materializes a session
// pinning the table + schema, and returns the session handle the
// caller will pass back to `ReadRows`. The reply carries the table
// schema verbatim so the caller does not need a follow-up
// `Catalog.DescribeTable` round-trip before starting to read.
CreateReadSession(context.Context, *CreateReadSessionRequest) (*ReadSession, error)
// ReadRows streams rows off the named stream. The stream id must be
// one of the `ReadSession.streams[*].name` values returned by the
// matching `CreateReadSession` call (or a child stream minted by
// `SplitReadStream`).
ReadRows(*ReadRowsRequest, grpc.ServerStreamingServer[ReadRowsResponse]) error
// SplitReadStream subdivides an existing stream's remaining row
// range into a primary stream (head) and a residual stream (tail).
SplitReadStream(context.Context, *SplitReadStreamRequest) (*SplitReadStreamResponse, error)
}
// UnimplementedStorageReadServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedStorageReadServer struct{}
func (UnimplementedStorageReadServer) CreateReadSession(context.Context, *CreateReadSessionRequest) (*ReadSession, error) {
return nil, status.Error(codes.Unimplemented, "method CreateReadSession not implemented")
}
func (UnimplementedStorageReadServer) ReadRows(*ReadRowsRequest, grpc.ServerStreamingServer[ReadRowsResponse]) error {
return status.Error(codes.Unimplemented, "method ReadRows not implemented")
}
func (UnimplementedStorageReadServer) SplitReadStream(context.Context, *SplitReadStreamRequest) (*SplitReadStreamResponse, error) {
return nil, status.Error(codes.Unimplemented, "method SplitReadStream not implemented")
}
func (UnimplementedStorageReadServer) testEmbeddedByValue() {}
// UnsafeStorageReadServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to StorageReadServer will
// result in compilation errors.
type UnsafeStorageReadServer interface {
mustEmbedUnimplementedStorageReadServer()
}
func RegisterStorageReadServer(s grpc.ServiceRegistrar, srv StorageReadServer) {
// If the following call panics, it indicates UnimplementedStorageReadServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&StorageRead_ServiceDesc, srv)
}
func _StorageRead_CreateReadSession_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CreateReadSessionRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageReadServer).CreateReadSession(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageRead_CreateReadSession_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageReadServer).CreateReadSession(ctx, req.(*CreateReadSessionRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StorageRead_ReadRows_Handler(srv interface{}, stream grpc.ServerStream) error {
m := new(ReadRowsRequest)
if err := stream.RecvMsg(m); err != nil {
return err
}
return srv.(StorageReadServer).ReadRows(m, &grpc.GenericServerStream[ReadRowsRequest, ReadRowsResponse]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageRead_ReadRowsServer = grpc.ServerStreamingServer[ReadRowsResponse]
func _StorageRead_SplitReadStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(SplitReadStreamRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageReadServer).SplitReadStream(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageRead_SplitReadStream_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageReadServer).SplitReadStream(ctx, req.(*SplitReadStreamRequest))
}
return interceptor(ctx, in, info, handler)
}
// StorageRead_ServiceDesc is the grpc.ServiceDesc for StorageRead service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var StorageRead_ServiceDesc = grpc.ServiceDesc{
ServiceName: "bigquery_emulator.v1.StorageRead",
HandlerType: (*StorageReadServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "CreateReadSession",
Handler: _StorageRead_CreateReadSession_Handler,
},
{
MethodName: "SplitReadStream",
Handler: _StorageRead_SplitReadStream_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "ReadRows",
Handler: _StorageRead_ReadRows_Handler,
ServerStreams: true,
},
},
Metadata: "storage_read.proto",
}
// storage_write.proto is the internal contract for the BigQuery Storage
// Write API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.insertAll` calls (and the
// gRPC AppendRows path used directly by the Storage Write client
// libraries) into RPCs against this service so the C++ engine can
// commit rows through the same `DuckDBStorage::AppendRows` primitive
// the local DML executor already uses.
//
// Storage Read/Write API handlers for `_default`, `COMMITTED`, `BUFFERED`,
// and `PENDING` stream types ship end-to-end; see `ROADMAP.md` for the
// posture matrix.
//
// Code generation:
// - Go: `task proto:gen` writes
// `gateway/enginepb/storage_write.{pb,grpc.pb}.go`. Same
// plugin pipeline as `emulator.proto`.
// - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
// `proto/BUILD.bazel` emit
// `storage_write.{pb,grpc.pb}.{h,cc}` into bazel-bin.
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.11
// protoc (unknown)
// source: storage_write.proto
package enginepb
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
// BigQuery's stream lifecycle vocabulary.
type WriteStream_Type int32
const (
// Reserved zero-value for proto3. Server treats this as
// COMMITTED (matching BigQuery's documented default).
WriteStream_TYPE_UNSPECIFIED WriteStream_Type = 0
// `_default` and explicit COMMITTED both commit on every
// AppendRows batch (rows immediately visible to readers).
// The emulator supports both.
WriteStream_COMMITTED WriteStream_Type = 1
// PENDING streams buffer rows server-side until
// `BatchCommitWriteStreams` makes them visible. Reserved for
// the deferred follow-up; CreateWriteStream returns
// UNIMPLEMENTED today.
WriteStream_PENDING WriteStream_Type = 2
// BUFFERED streams buffer rows server-side until `FlushRows`
// advances the visibility offset. Reserved for the deferred
// follow-up; CreateWriteStream returns UNIMPLEMENTED today.
WriteStream_BUFFERED WriteStream_Type = 3
)
// Enum value maps for WriteStream_Type.
var (
WriteStream_Type_name = map[int32]string{
0: "TYPE_UNSPECIFIED",
1: "COMMITTED",
2: "PENDING",
3: "BUFFERED",
}
WriteStream_Type_value = map[string]int32{
"TYPE_UNSPECIFIED": 0,
"COMMITTED": 1,
"PENDING": 2,
"BUFFERED": 3,
}
)
func (x WriteStream_Type) Enum() *WriteStream_Type {
p := new(WriteStream_Type)
*p = x
return p
}
func (x WriteStream_Type) String() string {
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
}
func (WriteStream_Type) Descriptor() protoreflect.EnumDescriptor {
return file_storage_write_proto_enumTypes[0].Descriptor()
}
func (WriteStream_Type) Type() protoreflect.EnumType {
return &file_storage_write_proto_enumTypes[0]
}
func (x WriteStream_Type) Number() protoreflect.EnumNumber {
return protoreflect.EnumNumber(x)
}
// Deprecated: Use WriteStream_Type.Descriptor instead.
func (WriteStream_Type) EnumDescriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{0, 0}
}
// WriteStream describes a write stream by name + type + pinned schema.
// `name` is the server-assigned id of the form
// `{table}/streams/{id}` (or the canonical `_default` reserved name);
// the caller passes it back to `AppendRows`.
type WriteStream struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Server-assigned stream id. Format:
//
// `projects/{p}/datasets/{d}/tables/{t}/streams/{id}`
//
// The `_default` stream uses the reserved id `_default`; the
// engine also accepts that the caller did not call
// CreateWriteStream first and routes appends to the table's
// implicit default stream.
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
Type WriteStream_Type `protobuf:"varint,2,opt,name=type,proto3,enum=bigquery_emulator.v1.WriteStream_Type" json:"type,omitempty"`
// Schema the stream pins at creation time. Engine populates this
// from the source table's `Storage::GetSchema` reply so the
// caller can sanity-check writer alignment without a follow-up
// DescribeTable round-trip.
Schema *TableSchema `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"`
// RFC3339 timestamp the stream was minted. The handler stamps
// this so a follow-up `GetWriteStream` can surface stream
// age (BigQuery uses it for retention windows).
CreateTime string `protobuf:"bytes,4,opt,name=create_time,json=createTime,proto3" json:"create_time,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *WriteStream) Reset() {
*x = WriteStream{}
mi := &file_storage_write_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *WriteStream) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*WriteStream) ProtoMessage() {}
func (x *WriteStream) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use WriteStream.ProtoReflect.Descriptor instead.
func (*WriteStream) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{0}
}
func (x *WriteStream) GetName() string {
if x != nil {
return x.Name
}
return ""
}
func (x *WriteStream) GetType() WriteStream_Type {
if x != nil {
return x.Type
}
return WriteStream_TYPE_UNSPECIFIED
}
func (x *WriteStream) GetSchema() *TableSchema {
if x != nil {
return x.Schema
}
return nil
}
func (x *WriteStream) GetCreateTime() string {
if x != nil {
return x.CreateTime
}
return ""
}
type CreateWriteStreamRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// BigQuery resource path the stream belongs to:
// `projects/{p}/datasets/{d}/tables/{t}`.
Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
// Stream shape the caller wants. Only `type` is read today;
// `name` and `schema` are server-populated.
WriteStream *WriteStream `protobuf:"bytes,2,opt,name=write_stream,json=writeStream,proto3" json:"write_stream,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *CreateWriteStreamRequest) Reset() {
*x = CreateWriteStreamRequest{}
mi := &file_storage_write_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *CreateWriteStreamRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*CreateWriteStreamRequest) ProtoMessage() {}
func (x *CreateWriteStreamRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use CreateWriteStreamRequest.ProtoReflect.Descriptor instead.
func (*CreateWriteStreamRequest) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{1}
}
func (x *CreateWriteStreamRequest) GetParent() string {
if x != nil {
return x.Parent
}
return ""
}
func (x *CreateWriteStreamRequest) GetWriteStream() *WriteStream {
if x != nil {
return x.WriteStream
}
return nil
}
type AppendRowsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// The full stream name (matching `WriteStream.name`). Required on
// the first message; subsequent messages on the same stream may
// leave this empty (the handler keeps the first message's
// binding) or re-assert the same value.
WriteStream string `protobuf:"bytes,1,opt,name=write_stream,json=writeStream,proto3" json:"write_stream,omitempty"`
// Optional offset for ordered append. The handler ignores the
// value for `_default` / `COMMITTED` (every append is immediately
// committed in arrival order); the field is here so the wire
// shape is forward-compatible with BUFFERED / PENDING flows.
Offset int64 `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
ProtoRows *AppendRowsRequest_ProtoData `protobuf:"bytes,4,opt,name=proto_rows,json=protoRows,proto3" json:"proto_rows,omitempty"`
// Caller-supplied trace id, mirrored back on the response so
// the producer can correlate appends with replies. Optional.
TraceId string `protobuf:"bytes,6,opt,name=trace_id,json=traceId,proto3" json:"trace_id,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *AppendRowsRequest) Reset() {
*x = AppendRowsRequest{}
mi := &file_storage_write_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *AppendRowsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*AppendRowsRequest) ProtoMessage() {}
func (x *AppendRowsRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use AppendRowsRequest.ProtoReflect.Descriptor instead.
func (*AppendRowsRequest) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{2}
}
func (x *AppendRowsRequest) GetWriteStream() string {
if x != nil {
return x.WriteStream
}
return ""
}
func (x *AppendRowsRequest) GetOffset() int64 {
if x != nil {
return x.Offset
}
return 0
}
func (x *AppendRowsRequest) GetProtoRows() *AppendRowsRequest_ProtoData {
if x != nil {
return x.ProtoRows
}
return nil
}
func (x *AppendRowsRequest) GetTraceId() string {
if x != nil {
return x.TraceId
}
return ""
}
type AppendRowsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Either AppendResult or error is set, never both.
//
// Types that are valid to be assigned to Response:
//
// *AppendRowsResponse_AppendResult_
// *AppendRowsResponse_ErrorMessage
Response isAppendRowsResponse_Response `protobuf_oneof:"response"`
// Trace id echoed from the request (when set). Empty if the
// request did not pin one.
TraceId string `protobuf:"bytes,3,opt,name=trace_id,json=traceId,proto3" json:"trace_id,omitempty"`
// Number of rows the engine committed for this request. The
// public surface does not advertise this on success; we surface
// it explicitly so the in-process gRPC test can assert per-batch
// commit counts without re-querying the table.
RowCount int64 `protobuf:"varint,4,opt,name=row_count,json=rowCount,proto3" json:"row_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *AppendRowsResponse) Reset() {
*x = AppendRowsResponse{}
mi := &file_storage_write_proto_msgTypes[3]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *AppendRowsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*AppendRowsResponse) ProtoMessage() {}
func (x *AppendRowsResponse) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[3]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use AppendRowsResponse.ProtoReflect.Descriptor instead.
func (*AppendRowsResponse) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{3}
}
func (x *AppendRowsResponse) GetResponse() isAppendRowsResponse_Response {
if x != nil {
return x.Response
}
return nil
}
func (x *AppendRowsResponse) GetAppendResult() *AppendRowsResponse_AppendResult {
if x != nil {
if x, ok := x.Response.(*AppendRowsResponse_AppendResult_); ok {
return x.AppendResult
}
}
return nil
}
func (x *AppendRowsResponse) GetErrorMessage() string {
if x != nil {
if x, ok := x.Response.(*AppendRowsResponse_ErrorMessage); ok {
return x.ErrorMessage
}
}
return ""
}
func (x *AppendRowsResponse) GetTraceId() string {
if x != nil {
return x.TraceId
}
return ""
}
func (x *AppendRowsResponse) GetRowCount() int64 {
if x != nil {
return x.RowCount
}
return 0
}
type isAppendRowsResponse_Response interface {
isAppendRowsResponse_Response()
}
type AppendRowsResponse_AppendResult_ struct {
AppendResult *AppendRowsResponse_AppendResult `protobuf:"bytes,1,opt,name=append_result,json=appendResult,proto3,oneof"`
}
type AppendRowsResponse_ErrorMessage struct {
// Error message from the storage layer (`DuckDBStorage::AppendRows`
// failures, schema mismatches, ...). The handler maps absl
// statuses onto a free-form message here; the public BigQuery
// surface uses `google.rpc.Status` but we keep the simpler
// shape so we don't pull `google.rpc.status` in.
ErrorMessage string `protobuf:"bytes,2,opt,name=error_message,json=errorMessage,proto3,oneof"`
}
func (*AppendRowsResponse_AppendResult_) isAppendRowsResponse_Response() {}
func (*AppendRowsResponse_ErrorMessage) isAppendRowsResponse_Response() {}
type GetWriteStreamRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *GetWriteStreamRequest) Reset() {
*x = GetWriteStreamRequest{}
mi := &file_storage_write_proto_msgTypes[4]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *GetWriteStreamRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*GetWriteStreamRequest) ProtoMessage() {}
func (x *GetWriteStreamRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[4]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use GetWriteStreamRequest.ProtoReflect.Descriptor instead.
func (*GetWriteStreamRequest) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{4}
}
func (x *GetWriteStreamRequest) GetName() string {
if x != nil {
return x.Name
}
return ""
}
type FinalizeWriteStreamRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FinalizeWriteStreamRequest) Reset() {
*x = FinalizeWriteStreamRequest{}
mi := &file_storage_write_proto_msgTypes[5]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FinalizeWriteStreamRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FinalizeWriteStreamRequest) ProtoMessage() {}
func (x *FinalizeWriteStreamRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[5]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FinalizeWriteStreamRequest.ProtoReflect.Descriptor instead.
func (*FinalizeWriteStreamRequest) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{5}
}
func (x *FinalizeWriteStreamRequest) GetName() string {
if x != nil {
return x.Name
}
return ""
}
type FinalizeWriteStreamResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Total rows committed on the stream. Reserved for the deferred
// follow-up; the emulator returns UNIMPLEMENTED.
RowCount int64 `protobuf:"varint,1,opt,name=row_count,json=rowCount,proto3" json:"row_count,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FinalizeWriteStreamResponse) Reset() {
*x = FinalizeWriteStreamResponse{}
mi := &file_storage_write_proto_msgTypes[6]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FinalizeWriteStreamResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FinalizeWriteStreamResponse) ProtoMessage() {}
func (x *FinalizeWriteStreamResponse) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[6]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FinalizeWriteStreamResponse.ProtoReflect.Descriptor instead.
func (*FinalizeWriteStreamResponse) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{6}
}
func (x *FinalizeWriteStreamResponse) GetRowCount() int64 {
if x != nil {
return x.RowCount
}
return 0
}
type BatchCommitWriteStreamsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
// Parent table that owns the streams.
Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
// Stream names to commit atomically. Reserved for the deferred
// follow-up; the emulator returns UNIMPLEMENTED.
WriteStreams []string `protobuf:"bytes,2,rep,name=write_streams,json=writeStreams,proto3" json:"write_streams,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BatchCommitWriteStreamsRequest) Reset() {
*x = BatchCommitWriteStreamsRequest{}
mi := &file_storage_write_proto_msgTypes[7]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BatchCommitWriteStreamsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BatchCommitWriteStreamsRequest) ProtoMessage() {}
func (x *BatchCommitWriteStreamsRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[7]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BatchCommitWriteStreamsRequest.ProtoReflect.Descriptor instead.
func (*BatchCommitWriteStreamsRequest) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{7}
}
func (x *BatchCommitWriteStreamsRequest) GetParent() string {
if x != nil {
return x.Parent
}
return ""
}
func (x *BatchCommitWriteStreamsRequest) GetWriteStreams() []string {
if x != nil {
return x.WriteStreams
}
return nil
}
type BatchCommitWriteStreamsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
// RFC3339 timestamp the commit landed. Reserved for the deferred
// follow-up.
CommitTime string `protobuf:"bytes,1,opt,name=commit_time,json=commitTime,proto3" json:"commit_time,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BatchCommitWriteStreamsResponse) Reset() {
*x = BatchCommitWriteStreamsResponse{}
mi := &file_storage_write_proto_msgTypes[8]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BatchCommitWriteStreamsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BatchCommitWriteStreamsResponse) ProtoMessage() {}
func (x *BatchCommitWriteStreamsResponse) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[8]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BatchCommitWriteStreamsResponse.ProtoReflect.Descriptor instead.
func (*BatchCommitWriteStreamsResponse) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{8}
}
func (x *BatchCommitWriteStreamsResponse) GetCommitTime() string {
if x != nil {
return x.CommitTime
}
return ""
}
type FlushRowsRequest struct {
state protoimpl.MessageState `protogen:"open.v1"`
WriteStream string `protobuf:"bytes,1,opt,name=write_stream,json=writeStream,proto3" json:"write_stream,omitempty"`
Offset int64 `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FlushRowsRequest) Reset() {
*x = FlushRowsRequest{}
mi := &file_storage_write_proto_msgTypes[9]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FlushRowsRequest) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FlushRowsRequest) ProtoMessage() {}
func (x *FlushRowsRequest) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[9]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FlushRowsRequest.ProtoReflect.Descriptor instead.
func (*FlushRowsRequest) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{9}
}
func (x *FlushRowsRequest) GetWriteStream() string {
if x != nil {
return x.WriteStream
}
return ""
}
func (x *FlushRowsRequest) GetOffset() int64 {
if x != nil {
return x.Offset
}
return 0
}
type FlushRowsResponse struct {
state protoimpl.MessageState `protogen:"open.v1"`
Offset int64 `protobuf:"varint,1,opt,name=offset,proto3" json:"offset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *FlushRowsResponse) Reset() {
*x = FlushRowsResponse{}
mi := &file_storage_write_proto_msgTypes[10]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *FlushRowsResponse) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*FlushRowsResponse) ProtoMessage() {}
func (x *FlushRowsResponse) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[10]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use FlushRowsResponse.ProtoReflect.Descriptor instead.
func (*FlushRowsResponse) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{10}
}
func (x *FlushRowsResponse) GetOffset() int64 {
if x != nil {
return x.Offset
}
return 0
}
// ProtoData carries the rows + the writer's schema. The writer
// schema is informational today (we trust the table's schema for
// shape validation); the rows ride on `DataRow` cells, the same
// shape `Catalog.InsertRows` uses.
type AppendRowsRequest_ProtoData struct {
state protoimpl.MessageState `protogen:"open.v1"`
WriterSchema *TableSchema `protobuf:"bytes,1,opt,name=writer_schema,json=writerSchema,proto3" json:"writer_schema,omitempty"`
Rows []*DataRow `protobuf:"bytes,2,rep,name=rows,proto3" json:"rows,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *AppendRowsRequest_ProtoData) Reset() {
*x = AppendRowsRequest_ProtoData{}
mi := &file_storage_write_proto_msgTypes[11]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *AppendRowsRequest_ProtoData) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*AppendRowsRequest_ProtoData) ProtoMessage() {}
func (x *AppendRowsRequest_ProtoData) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[11]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use AppendRowsRequest_ProtoData.ProtoReflect.Descriptor instead.
func (*AppendRowsRequest_ProtoData) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{2, 0}
}
func (x *AppendRowsRequest_ProtoData) GetWriterSchema() *TableSchema {
if x != nil {
return x.WriterSchema
}
return nil
}
func (x *AppendRowsRequest_ProtoData) GetRows() []*DataRow {
if x != nil {
return x.Rows
}
return nil
}
// AppendResult is the success envelope; carries the offset of
// the first row in the batch on a `_default` / `COMMITTED`
// stream that's just `prior_offset + 0` (every append commits
// in arrival order).
type AppendRowsResponse_AppendResult struct {
state protoimpl.MessageState `protogen:"open.v1"`
Offset int64 `protobuf:"varint,1,opt,name=offset,proto3" json:"offset,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *AppendRowsResponse_AppendResult) Reset() {
*x = AppendRowsResponse_AppendResult{}
mi := &file_storage_write_proto_msgTypes[12]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *AppendRowsResponse_AppendResult) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*AppendRowsResponse_AppendResult) ProtoMessage() {}
func (x *AppendRowsResponse_AppendResult) ProtoReflect() protoreflect.Message {
mi := &file_storage_write_proto_msgTypes[12]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use AppendRowsResponse_AppendResult.ProtoReflect.Descriptor instead.
func (*AppendRowsResponse_AppendResult) Descriptor() ([]byte, []int) {
return file_storage_write_proto_rawDescGZIP(), []int{3, 0}
}
func (x *AppendRowsResponse_AppendResult) GetOffset() int64 {
if x != nil {
return x.Offset
}
return 0
}
var File_storage_write_proto protoreflect.FileDescriptor
const file_storage_write_proto_rawDesc = "" +
"\n" +
"\x13storage_write.proto\x12\x14bigquery_emulator.v1\x1a\x0eemulator.proto\"\x81\x02\n" +
"\vWriteStream\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\x12:\n" +
"\x04type\x18\x02 \x01(\x0e2&.bigquery_emulator.v1.WriteStream.TypeR\x04type\x129\n" +
"\x06schema\x18\x03 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12\x1f\n" +
"\vcreate_time\x18\x04 \x01(\tR\n" +
"createTime\"F\n" +
"\x04Type\x12\x14\n" +
"\x10TYPE_UNSPECIFIED\x10\x00\x12\r\n" +
"\tCOMMITTED\x10\x01\x12\v\n" +
"\aPENDING\x10\x02\x12\f\n" +
"\bBUFFERED\x10\x03\"x\n" +
"\x18CreateWriteStreamRequest\x12\x16\n" +
"\x06parent\x18\x01 \x01(\tR\x06parent\x12D\n" +
"\fwrite_stream\x18\x02 \x01(\v2!.bigquery_emulator.v1.WriteStreamR\vwriteStream\"\xc4\x02\n" +
"\x11AppendRowsRequest\x12!\n" +
"\fwrite_stream\x18\x01 \x01(\tR\vwriteStream\x12\x16\n" +
"\x06offset\x18\x02 \x01(\x03R\x06offset\x12P\n" +
"\n" +
"proto_rows\x18\x04 \x01(\v21.bigquery_emulator.v1.AppendRowsRequest.ProtoDataR\tprotoRows\x12\x19\n" +
"\btrace_id\x18\x06 \x01(\tR\atraceId\x1a\x86\x01\n" +
"\tProtoData\x12F\n" +
"\rwriter_schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\fwriterSchema\x121\n" +
"\x04rows\x18\x02 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\"\x85\x02\n" +
"\x12AppendRowsResponse\x12\\\n" +
"\rappend_result\x18\x01 \x01(\v25.bigquery_emulator.v1.AppendRowsResponse.AppendResultH\x00R\fappendResult\x12%\n" +
"\rerror_message\x18\x02 \x01(\tH\x00R\ferrorMessage\x12\x19\n" +
"\btrace_id\x18\x03 \x01(\tR\atraceId\x12\x1b\n" +
"\trow_count\x18\x04 \x01(\x03R\browCount\x1a&\n" +
"\fAppendResult\x12\x16\n" +
"\x06offset\x18\x01 \x01(\x03R\x06offsetB\n" +
"\n" +
"\bresponse\"+\n" +
"\x15GetWriteStreamRequest\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\"0\n" +
"\x1aFinalizeWriteStreamRequest\x12\x12\n" +
"\x04name\x18\x01 \x01(\tR\x04name\":\n" +
"\x1bFinalizeWriteStreamResponse\x12\x1b\n" +
"\trow_count\x18\x01 \x01(\x03R\browCount\"]\n" +
"\x1eBatchCommitWriteStreamsRequest\x12\x16\n" +
"\x06parent\x18\x01 \x01(\tR\x06parent\x12#\n" +
"\rwrite_streams\x18\x02 \x03(\tR\fwriteStreams\"B\n" +
"\x1fBatchCommitWriteStreamsResponse\x12\x1f\n" +
"\vcommit_time\x18\x01 \x01(\tR\n" +
"commitTime\"M\n" +
"\x10FlushRowsRequest\x12!\n" +
"\fwrite_stream\x18\x01 \x01(\tR\vwriteStream\x12\x16\n" +
"\x06offset\x18\x02 \x01(\x03R\x06offset\"+\n" +
"\x11FlushRowsResponse\x12\x16\n" +
"\x06offset\x18\x01 \x01(\x03R\x06offset2\xa0\x05\n" +
"\fStorageWrite\x12f\n" +
"\x11CreateWriteStream\x12..bigquery_emulator.v1.CreateWriteStreamRequest\x1a!.bigquery_emulator.v1.WriteStream\x12c\n" +
"\n" +
"AppendRows\x12'.bigquery_emulator.v1.AppendRowsRequest\x1a(.bigquery_emulator.v1.AppendRowsResponse(\x010\x01\x12`\n" +
"\x0eGetWriteStream\x12+.bigquery_emulator.v1.GetWriteStreamRequest\x1a!.bigquery_emulator.v1.WriteStream\x12z\n" +
"\x13FinalizeWriteStream\x120.bigquery_emulator.v1.FinalizeWriteStreamRequest\x1a1.bigquery_emulator.v1.FinalizeWriteStreamResponse\x12\x86\x01\n" +
"\x17BatchCommitWriteStreams\x124.bigquery_emulator.v1.BatchCommitWriteStreamsRequest\x1a5.bigquery_emulator.v1.BatchCommitWriteStreamsResponse\x12\\\n" +
"\tFlushRows\x12&.bigquery_emulator.v1.FlushRowsRequest\x1a'.bigquery_emulator.v1.FlushRowsResponseBFZAgithub.com/vantaboard/bigquery-emulator/gateway/enginepb;enginepb\xf8\x01\x01b\x06proto3"
var (
file_storage_write_proto_rawDescOnce sync.Once
file_storage_write_proto_rawDescData []byte
)
func file_storage_write_proto_rawDescGZIP() []byte {
file_storage_write_proto_rawDescOnce.Do(func() {
file_storage_write_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_storage_write_proto_rawDesc), len(file_storage_write_proto_rawDesc)))
})
return file_storage_write_proto_rawDescData
}
var file_storage_write_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
var file_storage_write_proto_msgTypes = make([]protoimpl.MessageInfo, 13)
var file_storage_write_proto_goTypes = []any{
(WriteStream_Type)(0), // 0: bigquery_emulator.v1.WriteStream.Type
(*WriteStream)(nil), // 1: bigquery_emulator.v1.WriteStream
(*CreateWriteStreamRequest)(nil), // 2: bigquery_emulator.v1.CreateWriteStreamRequest
(*AppendRowsRequest)(nil), // 3: bigquery_emulator.v1.AppendRowsRequest
(*AppendRowsResponse)(nil), // 4: bigquery_emulator.v1.AppendRowsResponse
(*GetWriteStreamRequest)(nil), // 5: bigquery_emulator.v1.GetWriteStreamRequest
(*FinalizeWriteStreamRequest)(nil), // 6: bigquery_emulator.v1.FinalizeWriteStreamRequest
(*FinalizeWriteStreamResponse)(nil), // 7: bigquery_emulator.v1.FinalizeWriteStreamResponse
(*BatchCommitWriteStreamsRequest)(nil), // 8: bigquery_emulator.v1.BatchCommitWriteStreamsRequest
(*BatchCommitWriteStreamsResponse)(nil), // 9: bigquery_emulator.v1.BatchCommitWriteStreamsResponse
(*FlushRowsRequest)(nil), // 10: bigquery_emulator.v1.FlushRowsRequest
(*FlushRowsResponse)(nil), // 11: bigquery_emulator.v1.FlushRowsResponse
(*AppendRowsRequest_ProtoData)(nil), // 12: bigquery_emulator.v1.AppendRowsRequest.ProtoData
(*AppendRowsResponse_AppendResult)(nil), // 13: bigquery_emulator.v1.AppendRowsResponse.AppendResult
(*TableSchema)(nil), // 14: bigquery_emulator.v1.TableSchema
(*DataRow)(nil), // 15: bigquery_emulator.v1.DataRow
}
var file_storage_write_proto_depIdxs = []int32{
0, // 0: bigquery_emulator.v1.WriteStream.type:type_name -> bigquery_emulator.v1.WriteStream.Type
14, // 1: bigquery_emulator.v1.WriteStream.schema:type_name -> bigquery_emulator.v1.TableSchema
1, // 2: bigquery_emulator.v1.CreateWriteStreamRequest.write_stream:type_name -> bigquery_emulator.v1.WriteStream
12, // 3: bigquery_emulator.v1.AppendRowsRequest.proto_rows:type_name -> bigquery_emulator.v1.AppendRowsRequest.ProtoData
13, // 4: bigquery_emulator.v1.AppendRowsResponse.append_result:type_name -> bigquery_emulator.v1.AppendRowsResponse.AppendResult
14, // 5: bigquery_emulator.v1.AppendRowsRequest.ProtoData.writer_schema:type_name -> bigquery_emulator.v1.TableSchema
15, // 6: bigquery_emulator.v1.AppendRowsRequest.ProtoData.rows:type_name -> bigquery_emulator.v1.DataRow
2, // 7: bigquery_emulator.v1.StorageWrite.CreateWriteStream:input_type -> bigquery_emulator.v1.CreateWriteStreamRequest
3, // 8: bigquery_emulator.v1.StorageWrite.AppendRows:input_type -> bigquery_emulator.v1.AppendRowsRequest
5, // 9: bigquery_emulator.v1.StorageWrite.GetWriteStream:input_type -> bigquery_emulator.v1.GetWriteStreamRequest
6, // 10: bigquery_emulator.v1.StorageWrite.FinalizeWriteStream:input_type -> bigquery_emulator.v1.FinalizeWriteStreamRequest
8, // 11: bigquery_emulator.v1.StorageWrite.BatchCommitWriteStreams:input_type -> bigquery_emulator.v1.BatchCommitWriteStreamsRequest
10, // 12: bigquery_emulator.v1.StorageWrite.FlushRows:input_type -> bigquery_emulator.v1.FlushRowsRequest
1, // 13: bigquery_emulator.v1.StorageWrite.CreateWriteStream:output_type -> bigquery_emulator.v1.WriteStream
4, // 14: bigquery_emulator.v1.StorageWrite.AppendRows:output_type -> bigquery_emulator.v1.AppendRowsResponse
1, // 15: bigquery_emulator.v1.StorageWrite.GetWriteStream:output_type -> bigquery_emulator.v1.WriteStream
7, // 16: bigquery_emulator.v1.StorageWrite.FinalizeWriteStream:output_type -> bigquery_emulator.v1.FinalizeWriteStreamResponse
9, // 17: bigquery_emulator.v1.StorageWrite.BatchCommitWriteStreams:output_type -> bigquery_emulator.v1.BatchCommitWriteStreamsResponse
11, // 18: bigquery_emulator.v1.StorageWrite.FlushRows:output_type -> bigquery_emulator.v1.FlushRowsResponse
13, // [13:19] is the sub-list for method output_type
7, // [7:13] is the sub-list for method input_type
7, // [7:7] is the sub-list for extension type_name
7, // [7:7] is the sub-list for extension extendee
0, // [0:7] is the sub-list for field type_name
}
func init() { file_storage_write_proto_init() }
func file_storage_write_proto_init() {
if File_storage_write_proto != nil {
return
}
file_emulator_proto_init()
file_storage_write_proto_msgTypes[3].OneofWrappers = []any{
(*AppendRowsResponse_AppendResult_)(nil),
(*AppendRowsResponse_ErrorMessage)(nil),
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_storage_write_proto_rawDesc), len(file_storage_write_proto_rawDesc)),
NumEnums: 1,
NumMessages: 13,
NumExtensions: 0,
NumServices: 1,
},
GoTypes: file_storage_write_proto_goTypes,
DependencyIndexes: file_storage_write_proto_depIdxs,
EnumInfos: file_storage_write_proto_enumTypes,
MessageInfos: file_storage_write_proto_msgTypes,
}.Build()
File_storage_write_proto = out.File
file_storage_write_proto_goTypes = nil
file_storage_write_proto_depIdxs = nil
}
// storage_write.proto is the internal contract for the BigQuery Storage
// Write API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.insertAll` calls (and the
// gRPC AppendRows path used directly by the Storage Write client
// libraries) into RPCs against this service so the C++ engine can
// commit rows through the same `DuckDBStorage::AppendRows` primitive
// the local DML executor already uses.
//
// Storage Read/Write API handlers for `_default`, `COMMITTED`, `BUFFERED`,
// and `PENDING` stream types ship end-to-end; see `ROADMAP.md` for the
// posture matrix.
//
// Code generation:
// - Go: `task proto:gen` writes
// `gateway/enginepb/storage_write.{pb,grpc.pb}.go`. Same
// plugin pipeline as `emulator.proto`.
// - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
// `proto/BUILD.bazel` emit
// `storage_write.{pb,grpc.pb}.{h,cc}` into bazel-bin.
// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.2
// - protoc (unknown)
// source: storage_write.proto
package enginepb
import (
context "context"
grpc "google.golang.org/grpc"
codes "google.golang.org/grpc/codes"
status "google.golang.org/grpc/status"
)
// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9
const (
StorageWrite_CreateWriteStream_FullMethodName = "/bigquery_emulator.v1.StorageWrite/CreateWriteStream"
StorageWrite_AppendRows_FullMethodName = "/bigquery_emulator.v1.StorageWrite/AppendRows"
StorageWrite_GetWriteStream_FullMethodName = "/bigquery_emulator.v1.StorageWrite/GetWriteStream"
StorageWrite_FinalizeWriteStream_FullMethodName = "/bigquery_emulator.v1.StorageWrite/FinalizeWriteStream"
StorageWrite_BatchCommitWriteStreams_FullMethodName = "/bigquery_emulator.v1.StorageWrite/BatchCommitWriteStreams"
StorageWrite_FlushRows_FullMethodName = "/bigquery_emulator.v1.StorageWrite/FlushRows"
)
// StorageWriteClient is the client API for StorageWrite service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type StorageWriteClient interface {
// CreateWriteStream returns a stream handle the caller binds to
// `AppendRows`. The emulator supports `_default` and `COMMITTED`
// streams (every flushed batch becomes immediately visible to
// readers); `BUFFERED` / `PENDING` requests fail with UNIMPLEMENTED
// until the deferred follow-up lands.
CreateWriteStream(ctx context.Context, in *CreateWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error)
// AppendRows is bidirectional-streaming. The first message on a
// stream MUST set `write_stream`; subsequent messages may leave it
// empty (the handler keeps the binding from the first message) or
// re-assert the same name. The handler commits each `proto_rows`
// batch through `Storage::AppendRows` synchronously and replies with
// one `AppendRowsResponse` per request before reading the next.
AppendRows(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[AppendRowsRequest, AppendRowsResponse], error)
// GetWriteStream returns the stream metadata the engine recorded at
// CreateWriteStream time. Used by clients to verify the stream
// type / schema before opening an AppendRows session.
GetWriteStream(ctx context.Context, in *GetWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error)
// FinalizeWriteStream marks a stream as closed. The emulator
// returns UNIMPLEMENTED today; the surface is reserved for the
// deferred BUFFERED / PENDING follow-up where finalize is the
// producer's signal to the BatchCommitWriteStreams pass.
FinalizeWriteStream(ctx context.Context, in *FinalizeWriteStreamRequest, opts ...grpc.CallOption) (*FinalizeWriteStreamResponse, error)
// BatchCommitWriteStreams atomically commits a set of `PENDING`
// streams. The emulator returns UNIMPLEMENTED today; reserved for
// the deferred follow-up.
BatchCommitWriteStreams(ctx context.Context, in *BatchCommitWriteStreamsRequest, opts ...grpc.CallOption) (*BatchCommitWriteStreamsResponse, error)
// FlushRows advances the visibility offset on a `BUFFERED` stream.
// The emulator returns UNIMPLEMENTED today; reserved for the
// deferred follow-up.
FlushRows(ctx context.Context, in *FlushRowsRequest, opts ...grpc.CallOption) (*FlushRowsResponse, error)
}
type storageWriteClient struct {
cc grpc.ClientConnInterface
}
func NewStorageWriteClient(cc grpc.ClientConnInterface) StorageWriteClient {
return &storageWriteClient{cc}
}
func (c *storageWriteClient) CreateWriteStream(ctx context.Context, in *CreateWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(WriteStream)
err := c.cc.Invoke(ctx, StorageWrite_CreateWriteStream_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *storageWriteClient) AppendRows(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[AppendRowsRequest, AppendRowsResponse], error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
stream, err := c.cc.NewStream(ctx, &StorageWrite_ServiceDesc.Streams[0], StorageWrite_AppendRows_FullMethodName, cOpts...)
if err != nil {
return nil, err
}
x := &grpc.GenericClientStream[AppendRowsRequest, AppendRowsResponse]{ClientStream: stream}
return x, nil
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageWrite_AppendRowsClient = grpc.BidiStreamingClient[AppendRowsRequest, AppendRowsResponse]
func (c *storageWriteClient) GetWriteStream(ctx context.Context, in *GetWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(WriteStream)
err := c.cc.Invoke(ctx, StorageWrite_GetWriteStream_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *storageWriteClient) FinalizeWriteStream(ctx context.Context, in *FinalizeWriteStreamRequest, opts ...grpc.CallOption) (*FinalizeWriteStreamResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(FinalizeWriteStreamResponse)
err := c.cc.Invoke(ctx, StorageWrite_FinalizeWriteStream_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *storageWriteClient) BatchCommitWriteStreams(ctx context.Context, in *BatchCommitWriteStreamsRequest, opts ...grpc.CallOption) (*BatchCommitWriteStreamsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(BatchCommitWriteStreamsResponse)
err := c.cc.Invoke(ctx, StorageWrite_BatchCommitWriteStreams_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
func (c *storageWriteClient) FlushRows(ctx context.Context, in *FlushRowsRequest, opts ...grpc.CallOption) (*FlushRowsResponse, error) {
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
out := new(FlushRowsResponse)
err := c.cc.Invoke(ctx, StorageWrite_FlushRows_FullMethodName, in, out, cOpts...)
if err != nil {
return nil, err
}
return out, nil
}
// StorageWriteServer is the server API for StorageWrite service.
// All implementations should embed UnimplementedStorageWriteServer
// for forward compatibility.
type StorageWriteServer interface {
// CreateWriteStream returns a stream handle the caller binds to
// `AppendRows`. The emulator supports `_default` and `COMMITTED`
// streams (every flushed batch becomes immediately visible to
// readers); `BUFFERED` / `PENDING` requests fail with UNIMPLEMENTED
// until the deferred follow-up lands.
CreateWriteStream(context.Context, *CreateWriteStreamRequest) (*WriteStream, error)
// AppendRows is bidirectional-streaming. The first message on a
// stream MUST set `write_stream`; subsequent messages may leave it
// empty (the handler keeps the binding from the first message) or
// re-assert the same name. The handler commits each `proto_rows`
// batch through `Storage::AppendRows` synchronously and replies with
// one `AppendRowsResponse` per request before reading the next.
AppendRows(grpc.BidiStreamingServer[AppendRowsRequest, AppendRowsResponse]) error
// GetWriteStream returns the stream metadata the engine recorded at
// CreateWriteStream time. Used by clients to verify the stream
// type / schema before opening an AppendRows session.
GetWriteStream(context.Context, *GetWriteStreamRequest) (*WriteStream, error)
// FinalizeWriteStream marks a stream as closed. The emulator
// returns UNIMPLEMENTED today; the surface is reserved for the
// deferred BUFFERED / PENDING follow-up where finalize is the
// producer's signal to the BatchCommitWriteStreams pass.
FinalizeWriteStream(context.Context, *FinalizeWriteStreamRequest) (*FinalizeWriteStreamResponse, error)
// BatchCommitWriteStreams atomically commits a set of `PENDING`
// streams. The emulator returns UNIMPLEMENTED today; reserved for
// the deferred follow-up.
BatchCommitWriteStreams(context.Context, *BatchCommitWriteStreamsRequest) (*BatchCommitWriteStreamsResponse, error)
// FlushRows advances the visibility offset on a `BUFFERED` stream.
// The emulator returns UNIMPLEMENTED today; reserved for the
// deferred follow-up.
FlushRows(context.Context, *FlushRowsRequest) (*FlushRowsResponse, error)
}
// UnimplementedStorageWriteServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedStorageWriteServer struct{}
func (UnimplementedStorageWriteServer) CreateWriteStream(context.Context, *CreateWriteStreamRequest) (*WriteStream, error) {
return nil, status.Error(codes.Unimplemented, "method CreateWriteStream not implemented")
}
func (UnimplementedStorageWriteServer) AppendRows(grpc.BidiStreamingServer[AppendRowsRequest, AppendRowsResponse]) error {
return status.Error(codes.Unimplemented, "method AppendRows not implemented")
}
func (UnimplementedStorageWriteServer) GetWriteStream(context.Context, *GetWriteStreamRequest) (*WriteStream, error) {
return nil, status.Error(codes.Unimplemented, "method GetWriteStream not implemented")
}
func (UnimplementedStorageWriteServer) FinalizeWriteStream(context.Context, *FinalizeWriteStreamRequest) (*FinalizeWriteStreamResponse, error) {
return nil, status.Error(codes.Unimplemented, "method FinalizeWriteStream not implemented")
}
func (UnimplementedStorageWriteServer) BatchCommitWriteStreams(context.Context, *BatchCommitWriteStreamsRequest) (*BatchCommitWriteStreamsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method BatchCommitWriteStreams not implemented")
}
func (UnimplementedStorageWriteServer) FlushRows(context.Context, *FlushRowsRequest) (*FlushRowsResponse, error) {
return nil, status.Error(codes.Unimplemented, "method FlushRows not implemented")
}
func (UnimplementedStorageWriteServer) testEmbeddedByValue() {}
// UnsafeStorageWriteServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to StorageWriteServer will
// result in compilation errors.
type UnsafeStorageWriteServer interface {
mustEmbedUnimplementedStorageWriteServer()
}
func RegisterStorageWriteServer(s grpc.ServiceRegistrar, srv StorageWriteServer) {
// If the following call panics, it indicates UnimplementedStorageWriteServer was
// embedded by pointer and is nil. This will cause panics if an
// unimplemented method is ever invoked, so we test this at initialization
// time to prevent it from happening at runtime later due to I/O.
if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
t.testEmbeddedByValue()
}
s.RegisterService(&StorageWrite_ServiceDesc, srv)
}
func _StorageWrite_CreateWriteStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(CreateWriteStreamRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageWriteServer).CreateWriteStream(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageWrite_CreateWriteStream_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageWriteServer).CreateWriteStream(ctx, req.(*CreateWriteStreamRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StorageWrite_AppendRows_Handler(srv interface{}, stream grpc.ServerStream) error {
return srv.(StorageWriteServer).AppendRows(&grpc.GenericServerStream[AppendRowsRequest, AppendRowsResponse]{ServerStream: stream})
}
// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageWrite_AppendRowsServer = grpc.BidiStreamingServer[AppendRowsRequest, AppendRowsResponse]
func _StorageWrite_GetWriteStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(GetWriteStreamRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageWriteServer).GetWriteStream(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageWrite_GetWriteStream_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageWriteServer).GetWriteStream(ctx, req.(*GetWriteStreamRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StorageWrite_FinalizeWriteStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(FinalizeWriteStreamRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageWriteServer).FinalizeWriteStream(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageWrite_FinalizeWriteStream_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageWriteServer).FinalizeWriteStream(ctx, req.(*FinalizeWriteStreamRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StorageWrite_BatchCommitWriteStreams_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(BatchCommitWriteStreamsRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageWriteServer).BatchCommitWriteStreams(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageWrite_BatchCommitWriteStreams_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageWriteServer).BatchCommitWriteStreams(ctx, req.(*BatchCommitWriteStreamsRequest))
}
return interceptor(ctx, in, info, handler)
}
func _StorageWrite_FlushRows_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
in := new(FlushRowsRequest)
if err := dec(in); err != nil {
return nil, err
}
if interceptor == nil {
return srv.(StorageWriteServer).FlushRows(ctx, in)
}
info := &grpc.UnaryServerInfo{
Server: srv,
FullMethod: StorageWrite_FlushRows_FullMethodName,
}
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
return srv.(StorageWriteServer).FlushRows(ctx, req.(*FlushRowsRequest))
}
return interceptor(ctx, in, info, handler)
}
// StorageWrite_ServiceDesc is the grpc.ServiceDesc for StorageWrite service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var StorageWrite_ServiceDesc = grpc.ServiceDesc{
ServiceName: "bigquery_emulator.v1.StorageWrite",
HandlerType: (*StorageWriteServer)(nil),
Methods: []grpc.MethodDesc{
{
MethodName: "CreateWriteStream",
Handler: _StorageWrite_CreateWriteStream_Handler,
},
{
MethodName: "GetWriteStream",
Handler: _StorageWrite_GetWriteStream_Handler,
},
{
MethodName: "FinalizeWriteStream",
Handler: _StorageWrite_FinalizeWriteStream_Handler,
},
{
MethodName: "BatchCommitWriteStreams",
Handler: _StorageWrite_BatchCommitWriteStreams_Handler,
},
{
MethodName: "FlushRows",
Handler: _StorageWrite_FlushRows_Handler,
},
},
Streams: []grpc.StreamDesc{
{
StreamName: "AppendRows",
Handler: _StorageWrite_AppendRows_Handler,
ServerStreams: true,
ClientStreams: true,
},
},
Metadata: "storage_write.proto",
}
// Package connectionfixture seeds EXTERNAL_QUERY snapshot files under
// $data_dir/external/connections/<conn_id>/.
package connectionfixture
import (
"encoding/json"
"errors"
"os"
"path/filepath"
"strings"
)
// ManifestEntry maps a query string or alias to a result filename.
type ManifestEntry struct {
Query string `json:"query,omitempty" yaml:"query,omitempty"`
Alias string `json:"alias,omitempty" yaml:"alias,omitempty"`
Result string `json:"result" yaml:"result"`
}
// Manifest is the on-disk queries.yaml / queries.json shape.
type Manifest struct {
Queries []ManifestEntry `json:"queries" yaml:"queries"`
}
// ResultFile is schema + rows for one EXTERNAL_QUERY snapshot.
type ResultFile struct {
Schema []Column `json:"schema" yaml:"schema"`
Rows []map[string]any `json:"rows" yaml:"rows"`
}
// Column is one output field in a fixture result.
type Column struct {
Name string `json:"name" yaml:"name"`
Type string `json:"type" yaml:"type"`
}
// CopyTree copies committed fixture files from srcDir into
// dataDir/external/connections/connID/.
func CopyTree(dataDir, connID, srcDir string) error {
if dataDir == "" || connID == "" || srcDir == "" {
return errors.New("dataDir, connID, and srcDir are required")
}
dst := filepath.Join(dataDir, "external", "connections", connID)
if err := os.MkdirAll(dst, 0o750); err != nil {
return err
}
return filepath.WalkDir(srcDir, func(path string, d os.DirEntry, walkErr error) error {
if walkErr != nil {
return walkErr
}
if d.IsDir() {
return nil
}
rel, relErr := filepath.Rel(srcDir, path)
if relErr != nil {
return relErr
}
outPath := filepath.Join(dst, rel)
if !isPathWithin(outPath, dst) {
return errors.New("fixture path escapes destination directory")
}
if mkdirErr := os.MkdirAll(filepath.Dir(outPath), 0o750); mkdirErr != nil {
return mkdirErr
}
data, readErr := os.ReadFile(path) //nolint:gosec // fixture path under srcDir
if readErr != nil {
return readErr
}
if writeErr := os.WriteFile(
outPath,
data,
0o600,
); writeErr != nil { //nolint:gosec // outPath validated under dst
return writeErr
}
return nil
})
}
// WriteInline materializes manifest + one result file under dataDir.
func WriteInline(dataDir, connID string, manifest Manifest, resultName string, result ResultFile) error {
if dataDir == "" || connID == "" {
return errors.New("dataDir and connID are required")
}
root := filepath.Join(dataDir, "external", "connections", connID)
if err := os.MkdirAll(root, 0o750); err != nil {
return err
}
manifestPath := filepath.Join(root, "queries.json")
manifestRaw, marshalErr := json.MarshalIndent(manifest, "", " ")
if marshalErr != nil {
return marshalErr
}
if writeErr := os.WriteFile(manifestPath, manifestRaw, 0o600); writeErr != nil {
return writeErr
}
if resultName == "" {
resultName = "result.json"
}
resultRaw, resultMarshalErr := json.MarshalIndent(result, "", " ")
if resultMarshalErr != nil {
return resultMarshalErr
}
return os.WriteFile(filepath.Join(root, resultName), resultRaw, 0o600)
}
func isPathWithin(path, root string) bool {
absPath, err := filepath.Abs(path)
if err != nil {
return false
}
absRoot, err := filepath.Abs(root)
if err != nil {
return false
}
rel, err := filepath.Rel(absRoot, absPath)
if err != nil {
return false
}
return rel != ".." && !strings.HasPrefix(rel, ".."+string(filepath.Separator))
}
// Package external materializes BigQuery external tables into the
// engine catalog by fetching GCS (fake-gcs), Google Sheets (fixture/live),
// or local snapshot sources and bulk-inserting parsed rows.
package external
import (
"context"
"errors"
"fmt"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
"github.com/vantaboard/bigquery-emulator/gateway/load"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
)
// TempDatasetID is the internal dataset ephemeral tableDefinitions are
// registered under when the query omits defaultDataset. The gateway
// sets this as default_dataset_id on the engine QueryRequest so
// unqualified table ids in SQL resolve.
const TempDatasetID = "_bq_external_temp"
// Materialize fetches external source bytes, parses them, and registers
// the destination table with rows in the engine catalog (WRITE_TRUNCATE).
func Materialize(
ctx context.Context,
catalog enginepb.CatalogClient,
target Target,
cfg *bqtypes.ExternalDataConfiguration,
) error {
return MaterializeWith(ctx, catalog, target, cfg, nil)
}
// MaterializeWith materializes using an optional per-source resolver.
func MaterializeWith(
ctx context.Context,
catalog enginepb.CatalogClient,
target Target,
cfg *bqtypes.ExternalDataConfiguration,
resolver *Resolver,
) error {
if catalog == nil {
return errors.New("external: nil CatalogClient")
}
if cfg == nil {
return errors.New("external: externalDataConfiguration is required")
}
if err := validateExternalConfig(cfg); err != nil {
return err
}
if target.ProjectID == "" || target.DatasetID == "" || target.TableID == "" {
return errors.New("external: project, dataset, and table id are required")
}
schema := target.Schema
if schema == nil {
schema = cfg.Schema
}
skip := 0
if cfg.CsvOptions != nil {
skip = cfg.CsvOptions.SkipLeadingRows()
}
parsed, err := fetchAndParse(ctx, resolver, cfg, schema, skip)
if err != nil {
return err
}
return registerParsedRows(ctx, catalog, target, schema, parsed)
}
// Target names a catalog table to materialize.
type Target struct {
ProjectID string
DatasetID string
TableID string
// Schema from the enclosing Table resource; wins over config.Schema
// when both are set (permanent external table inserts).
Schema *bqtypes.TableSchema
}
func registerParsedRows(
ctx context.Context,
catalog enginepb.CatalogClient,
target Target,
fallbackSchema *bqtypes.TableSchema,
parsed load.ParsedRows,
) error {
protoSchema := schemaToProto(parsed.Schema)
if protoSchema == nil {
protoSchema = schemaToProto(fallbackSchema)
}
if protoSchema == nil || len(protoSchema.GetFields()) == 0 {
return errors.New("external table requires schema or autodetect=true for CSV")
}
if err := ensureDataset(ctx, catalog, target.ProjectID, target.DatasetID); err != nil {
return err
}
tableRef := &enginepb.TableRef{
ProjectId: target.ProjectID,
DatasetId: target.DatasetID,
TableId: target.TableID,
}
if tableExists(ctx, catalog, tableRef) {
if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
return fmt.Errorf("external drop table: %w", err)
}
}
if _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: tableRef,
Schema: protoSchema,
}); err != nil {
return fmt.Errorf("external register table: %w", err)
}
ref := seed.TableRef{
ProjectID: target.ProjectID,
DatasetID: target.DatasetID,
TableID: target.TableID,
}
applier := seed.NewCatalogApplier(catalog)
if _, err := applier.InsertRows(ctx, ref, protoSchema, parsed.Rows); err != nil {
return fmt.Errorf("external insert rows: %w", err)
}
return nil
}
// PrepareTableDefinitions materializes every ephemeral external table in
// defs. Returns the default dataset id the caller should forward to the
// engine when the query omitted defaultDataset.
func PrepareTableDefinitions(
ctx context.Context,
catalog enginepb.CatalogClient,
projectID string,
defs map[string]bqtypes.ExternalDataConfiguration,
defaultDataset string,
) (string, error) {
return PrepareTableDefinitionsWith(ctx, catalog, projectID, defs, defaultDataset, nil)
}
// PrepareTableDefinitionsWith materializes defs with an optional resolver.
func PrepareTableDefinitionsWith(
ctx context.Context,
catalog enginepb.CatalogClient,
projectID string,
defs map[string]bqtypes.ExternalDataConfiguration,
defaultDataset string,
resolver *Resolver,
) (string, error) {
if len(defs) == 0 {
return defaultDataset, nil
}
ds := defaultDataset
if ds == "" {
ds = TempDatasetID
}
for tableID, cfg := range defs {
cfgCopy := cfg
if err := MaterializeWith(ctx, catalog, Target{
ProjectID: projectID,
DatasetID: ds,
TableID: tableID,
Schema: cfg.Schema,
}, &cfgCopy, resolver); err != nil {
return "", err
}
}
if defaultDataset == "" {
return TempDatasetID, nil
}
return defaultDataset, nil
}
func isGoogleSheets(cfg *bqtypes.ExternalDataConfiguration) bool {
if strings.EqualFold(strings.TrimSpace(cfg.SourceFormat), "GOOGLE_SHEETS") {
return true
}
if cfg.GoogleSheetsOptions != nil {
return true
}
for _, uri := range cfg.SourceURIs {
if strings.Contains(uri, "docs.google.com/spreadsheets") {
return true
}
}
return false
}
func validateExternalConfig(cfg *bqtypes.ExternalDataConfiguration) error {
if isGoogleSheets(cfg) {
if len(cfg.SourceURIs) == 0 {
return errors.New("google sheets external table requires sourceUri")
}
return nil
}
if len(cfg.SourceURIs) == 0 {
return errors.New("external table requires at least one sourceUri")
}
for _, uri := range cfg.SourceURIs {
if IsAzureBlobURI(uri) {
return UnsupportedAzureBlobError()
}
if IsGoogleDriveURI(uri) {
return UnsupportedDriveError()
}
}
if isBigtable(cfg) {
for _, uri := range cfg.SourceURIs {
if err := ValidateBigtableURI(uri); err != nil {
return err
}
}
return nil
}
return nil
}
func parseBigtableExternal(schema *bqtypes.TableSchema) (load.ParsedRows, error) {
if schema != nil && len(schema.Fields) > 0 {
return load.ParsedRows{Schema: schema, Rows: []map[string]any{}}, nil
}
return load.ParsedRows{
Schema: &bqtypes.TableSchema{
Fields: []bqtypes.TableFieldSchema{{Name: "rowkey", Type: "STRING"}},
},
Rows: []map[string]any{},
}, nil
}
func fetchAndParse(
ctx context.Context,
resolver *Resolver,
cfg *bqtypes.ExternalDataConfiguration,
schema *bqtypes.TableSchema,
skipLeading int,
) (load.ParsedRows, error) {
if isGoogleSheets(cfg) {
return parseSheetsExternal(ctx, resolver, cfg, schema, skipLeading)
}
if isBigtable(cfg) {
return parseBigtableExternal(schema)
}
parsed, _, _, err := load.ParseExternalGCS(ctx, cfg, schema, skipLeading)
return parsed, err
}
func ensureDataset(ctx context.Context, catalog enginepb.CatalogClient, projectID, datasetID string) error {
applier := seed.NewCatalogApplier(catalog)
_, err := applier.EnsureDataset(ctx, projectID, datasetID, "US")
return err
}
func tableExists(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef) bool {
_, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
return err == nil
}
func schemaToProto(s *bqtypes.TableSchema) *enginepb.TableSchema {
if s == nil {
return nil
}
out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.Fields))}
for i := range s.Fields {
out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
}
return out
}
func fieldToProto(f bqtypes.TableFieldSchema) *enginepb.FieldSchema {
out := &enginepb.FieldSchema{
Name: f.Name,
Type: f.Type,
Mode: f.Mode,
Description: f.Description,
}
for i := range f.Fields {
out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
}
return out
}
// LoadSourceConfig loads external source resolution rules for dataDir.
func LoadSourceConfig(dataDir string) (*sourceconfig.Config, error) {
return sourceconfig.Load(dataDir)
}
package external
import (
"context"
_ "embed"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
"github.com/vantaboard/bigquery-emulator/gateway/load"
)
// Public sample sheet (Example Spreadsheet, Class Data tab) used for fixture
// and opt-in live conformance tests.
const (
ClassDataSheetDocID = "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms"
classDataSheetGID = "0"
)
//go:embed fixtures/google_sheets/class_data.csv
var classDataFixtureCSV []byte
// Resolver carries per-source configuration for materialization.
type Resolver struct {
cfg *sourceconfig.Config
}
// NewResolver returns a resolver; nil cfg uses package defaults.
func NewResolver(cfg *sourceconfig.Config) *Resolver {
return &Resolver{cfg: cfg}
}
func (r *Resolver) modeSheets(docID string) sourceconfig.Mode {
if r == nil || r.cfg == nil {
return sourceconfig.ModeFixture
}
return r.cfg.ResolveGoogleSheets(docID)
}
func (r *Resolver) fixturePath(docID, name string) string {
if r == nil || r.cfg == nil {
return ""
}
return filepath.Join(r.cfg.FixtureRoot(), "google_sheets", docID, name)
}
func fetchGoogleSheetsCSV(
ctx context.Context,
r *Resolver,
cfg *bqtypes.ExternalDataConfiguration,
) ([]byte, error) {
if len(cfg.SourceURIs) == 0 {
return nil, errors.New("google sheets external table requires sourceUri")
}
docID := sourceconfig.ExtractSheetDocID(cfg.SourceURIs[0])
if docID == "" {
return nil, fmt.Errorf("could not parse Google Sheets doc id from %q", cfg.SourceURIs[0])
}
mode := r.modeSheets(docID)
switch mode {
case sourceconfig.ModeLive:
return fetchLiveSheetsCSV(ctx, docID, cfg.GoogleSheetsOptions)
case sourceconfig.ModeLocal:
return nil, errors.New("google sheets local mode is not supported; use fixture or live")
default:
return loadFixtureSheetsCSV(r, docID)
}
}
func loadFixtureSheetsCSV(r *Resolver, docID string) ([]byte, error) {
if docID == ClassDataSheetDocID {
return classDataFixtureCSV, nil
}
if r != nil && r.cfg != nil && r.cfg.DataDir != "" {
for _, name := range []string{"data.csv", "class_data.csv", "sheet.csv"} {
p := r.fixturePath(docID, name)
if raw, err := os.ReadFile(p); err == nil { //nolint:gosec // operator data dir
return raw, nil
}
}
}
return nil, fmt.Errorf("no fixture snapshot for Google Sheets doc %s", docID)
}
func fetchLiveSheetsCSV(
ctx context.Context,
docID string,
opts *bqtypes.GoogleSheetsOptions,
) ([]byte, error) {
gid := classDataSheetGID
if opts != nil && strings.TrimSpace(opts.Range) != "" {
// Range like "Class Data!A1:F31" — live export uses gid; public sample uses gid 0.
_ = opts.Range
}
exportURL := fmt.Sprintf(
"https://docs.google.com/spreadsheets/d/%s/export?format=csv&gid=%s",
url.PathEscape(docID), url.QueryEscape(gid))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, exportURL, nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("fetch google sheets %s: %w", docID, err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
return nil, fmt.Errorf("fetch google sheets %s: HTTP %d: %s",
docID, resp.StatusCode, strings.TrimSpace(string(body)))
}
return io.ReadAll(resp.Body)
}
func parseSheetsExternal(
ctx context.Context,
r *Resolver,
cfg *bqtypes.ExternalDataConfiguration,
schema *bqtypes.TableSchema,
skipLeading int,
) (load.ParsedRows, error) {
data, err := fetchGoogleSheetsCSV(ctx, r, cfg)
if err != nil {
return load.ParsedRows{}, err
}
skip := skipLeading
if cfg.GoogleSheetsOptions != nil && cfg.GoogleSheetsOptions.SkipLeadingRows() > 0 {
skip = cfg.GoogleSheetsOptions.SkipLeadingRows()
} else if skip == 0 {
skip = 1
}
return load.ParseSource("CSV", data, schema, skip, cfg.Autodetect)
}
// Package sourceconfig resolves external data sources to fixture, local, or
// live modes. Defaults favor deterministic offline behavior; live upstream
// fetches are strictly opt-in per source kind or identity.
package sourceconfig
import (
"errors"
"os"
"path/filepath"
"strings"
"gopkg.in/yaml.v3"
)
// Mode is how an external source identity is resolved at fetch time.
type Mode string
const (
// ModeFixture reads committed snapshots under DataDir/external/fixtures/.
ModeFixture Mode = "fixture"
// ModeLocal uses fake-gcs (STORAGE_EMULATOR_HOST) or local file paths.
ModeLocal Mode = "local"
// ModeLive reaches a real upstream (credentials / network gated).
ModeLive Mode = "live"
)
// Kind classifies an external source for default-mode lookup.
type Kind string
const (
KindGCS Kind = "gcs"
KindGoogleSheets Kind = "google_sheets"
KindConnection Kind = "connection"
)
// Config holds per-source resolution rules loaded from DataDir and env vars.
type Config struct {
DataDir string
// defaults by kind; overridden per source id in Sources.
defaults map[Kind]Mode
sources map[string]Mode
}
type fileShape struct {
Defaults map[string]string `yaml:"defaults"`
Sources map[string]sourceEntry `yaml:"sources"`
}
type sourceEntry struct {
Kind string `yaml:"kind"`
Mode string `yaml:"mode"`
}
// Load builds a Config from dataDir and optional external_sources.yaml.
// When dataDir is empty, only env-based overrides apply.
func Load(dataDir string) (*Config, error) {
c := &Config{
DataDir: dataDir,
defaults: map[Kind]Mode{
KindGCS: ModeLocal,
KindGoogleSheets: ModeFixture,
KindConnection: ModeFixture,
},
sources: map[string]Mode{},
}
if dataDir != "" {
if err := c.loadYAMLFile(dataDir); err != nil {
return nil, err
}
}
c.applyEnvOverrides()
return c, nil
}
func (c *Config) loadYAMLFile(dataDir string) error {
path := filepath.Join(dataDir, "external_sources.yaml")
raw, err := os.ReadFile(path) //nolint:gosec // operator-controlled data dir
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil
}
return err
}
var f fileShape
if err := yaml.Unmarshal(raw, &f); err != nil {
return err
}
for k, v := range f.Defaults {
if m := parseMode(v); m != "" {
c.defaults[parseKind(k)] = m
}
}
for id, ent := range f.Sources {
if m := parseMode(ent.Mode); m != "" {
c.sources[normalizeID(id)] = m
}
}
return nil
}
func (c *Config) applyEnvOverrides() {
if truthy(os.Getenv("BIGQUERY_EMULATOR_LIVE_SHEETS")) {
c.defaults[KindGoogleSheets] = ModeLive
}
if v := strings.TrimSpace(os.Getenv("BIGQUERY_EMULATOR_EXTERNAL_GCS_MODE")); v != "" {
if m := parseMode(v); m != "" {
c.defaults[KindGCS] = m
}
}
if v := strings.TrimSpace(os.Getenv("BIGQUERY_EMULATOR_EXTERNAL_CONNECTIONS_MODE")); v != "" {
if m := parseMode(v); m != "" {
c.defaults[KindConnection] = m
}
}
}
func truthy(s string) bool {
switch strings.ToLower(strings.TrimSpace(s)) {
case "1", "true", "yes", "on":
return true
default:
return false
}
}
func parseMode(s string) Mode {
switch strings.ToLower(strings.TrimSpace(s)) {
case string(ModeFixture):
return ModeFixture
case string(ModeLocal):
return ModeLocal
case string(ModeLive):
return ModeLive
default:
return ""
}
}
func parseKind(s string) Kind {
switch strings.ToLower(strings.TrimSpace(s)) {
case "gcs":
return KindGCS
case "google_sheets", "googlesheets", "sheets":
return KindGoogleSheets
case "connection", "connections":
return KindConnection
default:
return Kind(s)
}
}
func normalizeID(id string) string {
return strings.ToLower(strings.TrimSpace(id))
}
// ResolveGCS returns the mode for a gs:// URI.
func (c *Config) ResolveGCS(uri string) Mode {
if c == nil {
return ModeLocal
}
if m, ok := c.sources[normalizeID(uri)]; ok {
return m
}
bucket := gcsBucket(uri)
if bucket != "" {
if m, ok := c.sources[normalizeID(bucket)]; ok {
return m
}
}
return c.defaults[KindGCS]
}
// ResolveGoogleSheets returns the mode for a Sheets doc id or URL.
func (c *Config) ResolveGoogleSheets(docOrURL string) Mode {
if c == nil {
return ModeFixture
}
id := ExtractSheetDocID(docOrURL)
if id == "" {
id = docOrURL
}
if m, ok := c.sources[normalizeID(id)]; ok {
return m
}
return c.defaults[KindGoogleSheets]
}
// ResolveConnection returns the mode for a connection resource name or id.
func (c *Config) ResolveConnection(name string) Mode {
if c == nil {
return ModeFixture
}
id := connectionID(name)
if m, ok := c.sources[normalizeID(id)]; ok {
return m
}
if m, ok := c.sources[normalizeID(name)]; ok {
return m
}
return c.defaults[KindConnection]
}
// FixtureRoot returns the directory for committed external snapshots.
func (c *Config) FixtureRoot() string {
if c == nil || c.DataDir == "" {
return ""
}
return filepath.Join(c.DataDir, "external", "fixtures")
}
// GCSCacheRoot returns the directory where gs:// objects are materialized
// for engine LOAD/EXPORT and offline snapshots.
func (c *Config) GCSCacheRoot() string {
if c == nil || c.DataDir == "" {
return ""
}
return filepath.Join(c.DataDir, "external", "gcs-cache")
}
// ConnectionFixtureRoot returns fixture SQL result files for EXTERNAL_QUERY.
func (c *Config) ConnectionFixtureRoot() string {
if c == nil || c.DataDir == "" {
return ""
}
return filepath.Join(c.DataDir, "external", "connections")
}
func gcsBucket(uri string) string {
rest := strings.TrimPrefix(uri, "gs://")
if i := strings.Index(rest, "/"); i > 0 {
return rest[:i]
}
return ""
}
func connectionID(name string) string {
name = strings.TrimSpace(name)
if i := strings.LastIndex(name, "/"); i >= 0 {
return name[i+1:]
}
// EXTERNAL_QUERY connection arg may be region.id
if i := strings.LastIndex(name, "."); i >= 0 {
return name[i+1:]
}
return name
}
// ExtractSheetDocID parses a Google Sheets URL or bare doc id.
func ExtractSheetDocID(uri string) string {
uri = strings.TrimSpace(uri)
if uri == "" {
return ""
}
const marker = "/d/"
if _, after, ok := strings.Cut(uri, marker); ok {
rest := after
if j := strings.IndexAny(rest, "/#?"); j >= 0 {
return rest[:j]
}
return rest
}
if !strings.Contains(uri, "/") && !strings.Contains(uri, "://") {
return uri
}
return ""
}
package external
import (
"errors"
"fmt"
"slices"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
const sourceFormatBigtable = "BIGTABLE"
// IsBigtableSourceFormat reports whether format is BIGTABLE.
func IsBigtableSourceFormat(format string) bool {
return strings.EqualFold(strings.TrimSpace(format), sourceFormatBigtable)
}
// IsBigtableSourceURI reports whether uri is a Bigtable REST source URI.
func IsBigtableSourceURI(uri string) bool {
u := strings.TrimSpace(uri)
return strings.Contains(u, "googleapis.com/bigtable/") ||
strings.HasPrefix(u, "https://bigtable.googleapis.com/")
}
// ValidateBigtableURI checks the canonical Bigtable external URI shape.
func ValidateBigtableURI(uri string) error {
if !IsBigtableSourceURI(uri) {
return fmt.Errorf("invalid Bigtable sourceUri: %q", uri)
}
if !strings.Contains(uri, "/projects/") ||
!strings.Contains(uri, "/instances/") ||
!strings.Contains(uri, "/tables/") {
return fmt.Errorf(
"invalid Bigtable sourceUri (expected .../projects/P/instances/I/tables/T): %q",
uri,
)
}
return nil
}
// IsAzureBlobURI reports Azure Blob / ADLS URIs the UI may submit.
func IsAzureBlobURI(uri string) bool {
u := strings.TrimSpace(strings.ToLower(uri))
return strings.HasPrefix(u, "azure://") ||
strings.Contains(u, ".blob.core.windows.net/") ||
strings.Contains(u, ".dfs.core.windows.net/")
}
// IsGoogleDriveURI reports non-Sheets Google Drive URIs.
func IsGoogleDriveURI(uri string) bool {
u := strings.TrimSpace(uri)
return strings.Contains(u, "drive.google.com/") &&
!strings.Contains(u, "spreadsheets")
}
// UnsupportedAzureBlobError is returned for Azure external-table URIs.
func UnsupportedAzureBlobError() error {
return errors.New("azure blob storage external tables are not supported in the emulator")
}
// UnsupportedDriveError is returned for Google Drive file URIs.
func UnsupportedDriveError() error {
return errors.New(
"google drive external tables are not supported in the emulator (use GOOGLE_SHEETS for spreadsheets)",
)
}
func isBigtable(cfg *bqtypes.ExternalDataConfiguration) bool {
if IsBigtableSourceFormat(cfg.SourceFormat) {
return true
}
return slices.ContainsFunc(cfg.SourceURIs, IsBigtableSourceURI)
}
// Package extract implements synchronous BigQuery EXTRACT jobs.
package extract
import (
"bytes"
"compress/gzip"
"context"
"encoding/csv"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/load"
)
const listPageSize = 10_000
// Result captures extract-job statistics.
type Result struct {
InputBytes int64
DestinationURIFileCounts []int64
}
// Execute runs a synchronous EXTRACT job.
func Execute(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationExtract,
defaultProject string,
) (Result, error) {
if cfg == nil {
return Result{}, errors.New("extract configuration is required")
}
if cfg.SourceTable == nil || cfg.SourceTable.TableID == "" {
return Result{}, errors.New("sourceTable.tableId is required")
}
if len(cfg.DestinationURIs) == 0 {
return Result{}, errors.New("destinationUris is required")
}
projectID := cfg.SourceTable.ProjectID
if projectID == "" {
projectID = defaultProject
}
datasetID := cfg.SourceTable.DatasetID
tableID := cfg.SourceTable.TableID
ref := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
if err != nil {
return Result{}, fmt.Errorf("source table: %w", err)
}
schema := desc.GetSchema()
rows, err := listAllRows(ctx, catalog, ref)
if err != nil {
return Result{}, err
}
format := cfg.DestinationFormat
if format == "" {
format = "CSV"
}
payload, contentType, err := serializeRows(schema, rows, format)
if err != nil {
return Result{}, err
}
payload, contentType, err = maybeGzip(cfg.Compression, payload, contentType)
if err != nil {
return Result{}, err
}
counts, err := uploadDestinations(ctx, cfg.DestinationURIs, contentType, payload)
if err != nil {
return Result{}, err
}
return Result{
InputBytes: int64(len(payload)),
DestinationURIFileCounts: counts,
}, nil
}
func maybeGzip(compression string, payload []byte, contentType string) ([]byte, string, error) {
if !strings.EqualFold(compression, "GZIP") {
return payload, contentType, nil
}
out, err := gzipBytes(payload)
if err != nil {
return nil, "", err
}
return out, "application/gzip", nil
}
func uploadDestinations(ctx context.Context, uris []string, contentType string, payload []byte) ([]int64, error) {
counts := make([]int64, len(uris))
for i, uri := range uris {
if err := load.PutGCS(ctx, uri, contentType, payload); err != nil {
return nil, err
}
counts[i] = 1
}
return counts, nil
}
func listAllRows(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef,
) ([]*enginepb.DataRow, error) {
var out []*enginepb.DataRow
start := int64(0)
for {
resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: ref,
StartIndex: start,
MaxResults: listPageSize,
})
if err != nil {
return nil, err
}
rows := resp.GetRows()
if len(rows) == 0 {
break
}
out = append(out, rows...)
start += int64(len(rows))
if start >= resp.GetTotalRows() {
break
}
}
return out, nil
}
func serializeRows(schema *enginepb.TableSchema, rows []*enginepb.DataRow, format string) ([]byte, string, error) {
switch strings.ToUpper(format) {
case "CSV":
return serializeCSV(schema, rows)
case "NEWLINE_DELIMITED_JSON":
return serializeNDJSON(schema, rows)
default:
return nil, "", fmt.Errorf("unsupported destinationFormat %q", format)
}
}
func serializeCSV(schema *enginepb.TableSchema, rows []*enginepb.DataRow) ([]byte, string, error) {
var buf bytes.Buffer
w := csv.NewWriter(&buf)
fields := schema.GetFields()
header := make([]string, len(fields))
for i, f := range fields {
header[i] = f.GetName()
}
if err := w.Write(header); err != nil {
return nil, "", err
}
for _, row := range rows {
record := make([]string, len(fields))
cells := row.GetCells()
for i := range fields {
if i < len(cells) {
record[i] = cellString(cells[i])
}
}
if err := w.Write(record); err != nil {
return nil, "", err
}
}
w.Flush()
if err := w.Error(); err != nil {
return nil, "", err
}
return buf.Bytes(), "text/csv", nil
}
func serializeNDJSON(schema *enginepb.TableSchema, rows []*enginepb.DataRow) ([]byte, string, error) {
var buf bytes.Buffer
fields := schema.GetFields()
for _, row := range rows {
obj := make(map[string]any, len(fields))
cells := row.GetCells()
for i, f := range fields {
if i < len(cells) {
obj[f.GetName()] = cellJSONValue(cells[i], f.GetType())
}
}
line, err := json.Marshal(obj)
if err != nil {
return nil, "", err
}
buf.Write(line)
buf.WriteByte('\n')
}
return buf.Bytes(), "application/json", nil
}
func cellString(c *enginepb.Cell) string {
if c == nil || c.GetNullValue() {
return ""
}
return c.GetStringValue()
}
func cellJSONValue(c *enginepb.Cell, typ string) any {
if c == nil || c.GetNullValue() {
return nil
}
s := c.GetStringValue()
switch strings.ToUpper(typ) {
case "INTEGER", "INT64":
if n, err := strconv.ParseInt(s, 10, 64); err == nil {
return n
}
case "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC":
if f, err := strconv.ParseFloat(s, 64); err == nil {
return f
}
case "BOOLEAN", "BOOL":
return s == "true"
}
return s
}
func gzipBytes(data []byte) ([]byte, error) {
var buf bytes.Buffer
zw := gzip.NewWriter(&buf)
if _, err := zw.Write(data); err != nil {
return nil, err
}
if err := zw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// FormatStatistics maps Result into jobs.ExtractStatistics.
func FormatStatistics(r Result) *jobs.ExtractStatistics {
counts := make([]string, len(r.DestinationURIFileCounts))
for i, c := range r.DestinationURIFileCounts {
counts[i] = strconv.FormatInt(c, 10)
}
return &jobs.ExtractStatistics{
DestinationURIFileCounts: counts,
InputBytes: strconv.FormatInt(r.InputBytes, 10),
}
}
// Package gateway runs the BigQuery emulator's REST gateway and manages
// the lifecycle of the C++ engine subprocess.
//
// The flow mirrors cloud-spanner-emulator's gateway:
//
// 1. Optionally spawn the engine binary, wiring its stdout/stderr.
// 2. Wait for the engine's gRPC port to become reachable.
// 3. Start the HTTP server that serves the BigQuery REST API.
// 4. On SIGINT/SIGTERM, shut down both cleanly.
package gateway
import (
"context"
"errors"
"fmt"
"log/slog"
"net/http"
"os"
"os/exec"
"os/signal"
"syscall"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/grpcserver"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
)
// engineReadyTimeout bounds how long Gateway.Run will wait for the engine
// subprocess's gRPC health service to report SERVING before giving up.
// 30s is generous: a debug build of the engine takes <1s to bind and
// flip to SERVING on a developer laptop, but CI cold-starts and
// container builds sometimes spend 5-10s in linker/loader before main()
// runs.
const engineReadyTimeout = 30 * time.Second
// Options configures the gateway.
type Options struct {
// HTTPAddress is the host:port the REST gateway listens on, e.g.
// "localhost:9050".
HTTPAddress string
// EngineAddress is the host:port of the internal C++ engine gRPC
// server, e.g. "localhost:9061". The Go gateway forwards SQL work
// and the bqstorage shim's engine client to this address.
EngineAddress string
// StorageGRPCAddress is the host:port where the gateway registers
// the public google.cloud.bigquery.storage.v1 BigQueryRead /
// BigQueryWrite services, e.g. "localhost:9060". Client libraries
// dial BIGQUERY_STORAGE_GRPC_ENDPOINT here.
StorageGRPCAddress string
// EngineBinary is the path to the C++ engine binary. If empty, the
// gateway runs without an engine (useful early on while the engine
// is still being scaffolded; queries will return Unimplemented).
EngineBinary string
// EngineArgs is the additional flag list passed to the engine
// subprocess after `--host_port`. Use this to forward
// `--data_dir` (and any future engine-level flags) from
// gateway-level CLI flags through to `emulator_main` without
// the gateway needing to know each flag's semantics.
EngineArgs []string
// CopyEngineStdout / CopyEngineStderr forward the engine subprocess's
// streams to the gateway's own streams.
CopyEngineStdout bool
CopyEngineStderr bool
// LogRequests prints each REST request and response.
LogRequests bool
// DefaultProjectID is the project clients are assumed to be acting
// against when seeding or other gateway-level operations need a
// fallback project. Mirrors `--project-id` on gateway_main.
DefaultProjectID string
// DefaultDatasetID is the server-level fallback dataset used to
// resolve unqualified table names when a query/job does not carry
// its own `defaultDataset`. Mirrors setting `default_dataset` on a
// production BigQuery client/job. Empty means no fallback (bare
// table names error, exactly like production with no default set).
// Mirrors `--dataset` on gateway_main.
DefaultDatasetID string
// DefaultDatasetLocation is the BigQuery location used as the
// fallback when a dataset is created without an explicit location
// (US, EU, regional). Mirrors `--default-dataset-location`.
DefaultDatasetLocation string
// EnableSeedAPI registers `POST /api/emulator/seed` and the
// matching `GET .../operations/{operationId}` endpoints so a
// caller can copy live production BigQuery metadata + rows into
// this emulator. Default false (off) for local safety.
EnableSeedAPI bool
// SeedAPIAllowRemote allows non-loopback callers to hit the seed
// API when true. When false (the default), seed routes refuse
// any request whose RemoteAddr is not loopback.
SeedAPIAllowRemote bool
// SeedAPISeedToken, when non-empty, requires matching header
// `X-BigQuery-Emulator-Seed-Token` on every seed API request.
// Loaded from `BIGQUERY_EMULATOR_SEED_TOKEN` when the flag is
// empty (see binaries/gateway_main).
SeedAPISeedToken string
// SeedFiles is the optional list of YAML seed-data file paths
// the gateway applies after the engine reports SERVING but
// before it starts accepting public traffic. See
// gateway/seedfile for the schema.
SeedFiles []string
// EnableSQLToolsAPI registers POST /api/emulator/sql/{format,parse,
// tokenize,complete} for downstream UIs. Off by default.
EnableSQLToolsAPI bool
// SQLToolsAPIAllowRemote allows non-loopback callers when true.
SQLToolsAPIAllowRemote bool
// SQLToolsAPISeedToken requires matching header
// X-BigQuery-Emulator-SqlTools-Token when non-empty.
SQLToolsAPISeedToken string
// DataDir is the persistent storage root the engine uses for
// the DuckDB catalog + table data. Mirrors `--data-dir`; the
// gateway passes it through via `--data_dir` in EngineArgs.
DataDir string
// InitialDataDir is an optional template directory the gateway
// copies into DataDir on startup when DataDir does not yet
// contain an initialized catalog (`catalog.duckdb` missing).
// Mirrors `--initial-data-dir` on gateway_main.
InitialDataDir string
// Debug enables verbose request and lifecycle logging.
Debug bool
// Logger is the structured logger the gateway emits lifecycle and
// request events to. When nil, the gateway logs to a discard
// handler so callers that want silent embedding (unit tests, the
// shallow-emulator harness) get zero output without having to
// build their own no-op logger. Production binaries (see
// binaries/gateway_main) wire a real *slog.Logger here so the
// emulator's structured logs surface in stderr / stackdriver.
Logger *slog.Logger
}
// Gateway is the top-level BigQuery emulator gateway.
type Gateway struct {
opts Options
logger *slog.Logger
engine *exec.Cmd
engineDone chan struct{}
// engineClient is the long-lived gRPC channel to the engine
// subprocess. nil when EngineBinary is empty (gateway-only stub mode).
engineClient *engine.Client
// preStartHook runs once just before the engine subprocess is
// spawned. Use it for filesystem prep that must complete before
// the engine touches DataDir (e.g. materializing a template tree
// into an empty data directory).
preStartHook func(Options) error
// postEngineHook runs once after the engine reports SERVING but
// before the gateway begins serving HTTP traffic. Use it for
// startup-time seeding from YAML files that needs the
// CatalogClient to be reachable.
postEngineHook func(Options, *engine.Client) error
// storageGRPC is the public BigQuery Storage listener (nil when
// StorageGRPCAddress is empty).
storageGRPC *grpcserver.Server
}
// New constructs a Gateway. Run actually starts it.
func New(opts Options) *Gateway {
logger := opts.Logger
if logger == nil {
logger = slog.New(slog.DiscardHandler)
}
return &Gateway{opts: opts, logger: logger}
}
// WithPreStartHook installs a callback executed once before the engine
// subprocess is spawned. The hook runs synchronously on the Run
// goroutine and a non-nil error aborts startup without touching the
// engine.
func (g *Gateway) WithPreStartHook(hook func(Options) error) *Gateway {
g.preStartHook = hook
return g
}
// WithPostEngineHook installs a callback executed once after the
// engine reports SERVING but before the HTTP gateway accepts traffic.
// The hook receives the long-lived *engine.Client so it can use the
// CatalogClient / QueryClient to mutate state (e.g. apply YAML seed
// files). A non-nil error from the hook tears down the engine and
// aborts Run.
func (g *Gateway) WithPostEngineHook(hook func(Options, *engine.Client) error) *Gateway {
g.postEngineHook = hook
return g
}
// Run starts the engine subprocess (if configured) and the HTTP server,
// then blocks until either terminates or a signal arrives.
func (g *Gateway) Run() error {
ctx := context.Background()
if g.preStartHook != nil {
if err := g.preStartHook(g.opts); err != nil {
return fmt.Errorf("pre-start hook: %w", err)
}
}
if err := g.startEngine(ctx); err != nil {
return fmt.Errorf("start engine: %w", err)
}
if g.postEngineHook != nil {
if err := g.postEngineHook(g.opts, g.engineClient); err != nil {
g.stopEngine()
return fmt.Errorf("post-engine hook: %w", err)
}
}
deps := handlers.BuildDependenciesWith(g.engineClient, handlers.DepsOptions{
DataDir: g.opts.DataDir,
DefaultDatasetID: g.opts.DefaultDatasetID,
})
if err := g.startStorageGRPC(ctx, deps); err != nil {
g.stopEngine()
return fmt.Errorf("start storage grpc: %w", err)
}
srv := &http.Server{
Addr: g.opts.HTTPAddress,
Handler: NewServer(g.opts, deps, g.engineClient),
ReadHeaderTimeout: 10 * time.Second,
}
errCh := make(chan error, 1)
go func() {
g.logStartupExpectations(ctx)
err := srv.ListenAndServe()
if err != nil && !errors.Is(err, http.ErrServerClosed) {
errCh <- err
return
}
errCh <- nil
}()
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
return g.waitForShutdown(ctx, srv, errCh, sigCh)
}
// startEngine spawns the C++ engine subprocess if one is configured and
// waits for it to come up. It is a no-op when EngineBinary is empty.
func (g *Gateway) startEngine(ctx context.Context) error {
if g.opts.EngineBinary == "" {
return nil
}
args := []string{
"--host_port", g.opts.EngineAddress,
}
args = append(args, g.opts.EngineArgs...)
// #nosec G204 -- engine binary path is operator-supplied via
// --engine_binary.
cmd := exec.Command(g.opts.EngineBinary, args...)
if g.opts.CopyEngineStdout {
cmd.Stdout = os.Stdout
}
if g.opts.CopyEngineStderr {
cmd.Stderr = os.Stderr
}
if err := cmd.Start(); err != nil {
return fmt.Errorf("start %s: %w", g.opts.EngineBinary, err)
}
g.engine = cmd
g.engineDone = make(chan struct{})
go func() {
err := cmd.Wait()
close(g.engineDone)
if err != nil {
g.logger.WarnContext(ctx, "engine subprocess exited",
slog.Any("err", err))
}
}()
if err := g.connectAndWaitForEngine(ctx); err != nil {
return err
}
return nil
}
// connectAndWaitForEngine dials the engine's gRPC port and polls
// grpc.health.v1.Health.Check until it reports SERVING (or
// engineReadyTimeout fires). Replaces the earlier sleep-and-pray
// stub with a real readiness probe so the gateway's HTTP listener
// never accepts traffic before the engine is actually able to answer
// it.
//
// Stores the live *engine.Client on the receiver for the lifetime of
// the gateway; the connection is reused for every business RPC and torn
// down by stopEngine.
func (g *Gateway) connectAndWaitForEngine(ctx context.Context) error {
client, err := engine.Dial(g.opts.EngineAddress)
if err != nil {
return fmt.Errorf("dial engine at %s: %w", g.opts.EngineAddress, err)
}
readyCtx, cancel := context.WithTimeout(ctx, engineReadyTimeout)
defer cancel()
if err := client.WaitForReady(readyCtx); err != nil {
_ = client.Close()
return fmt.Errorf("wait for engine ready at %s: %w", g.opts.EngineAddress, err)
}
g.engineClient = client
g.logger.InfoContext(ctx, "engine grpc serving",
slog.String("addr", g.opts.EngineAddress))
return nil
}
func (g *Gateway) stopStorageGRPC() {
if g.storageGRPC != nil {
_ = g.storageGRPC.Close()
g.storageGRPC = nil
}
}
func (g *Gateway) stopEngine() {
if g.engineClient != nil {
_ = g.engineClient.Close()
g.engineClient = nil
}
if g.engine == nil || g.engine.Process == nil {
return
}
_ = g.engine.Process.Signal(os.Interrupt)
select {
case <-g.engineDone:
case <-time.After(5 * time.Second):
_ = g.engine.Process.Kill()
<-g.engineDone
}
}
package gateway
import (
"context"
"log/slog"
"net/http"
"os"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/grpcserver"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
)
func (g *Gateway) startStorageGRPC(ctx context.Context, deps handlers.Dependencies) error {
if g.opts.StorageGRPCAddress == "" {
return nil
}
grpcSrv, err := grpcserver.Start(g.opts.StorageGRPCAddress, g.engineClient, deps)
if err != nil {
return err
}
g.storageGRPC = grpcSrv
go func() {
if serveErr := grpcSrv.Serve(); serveErr != nil {
g.logger.WarnContext(ctx, "storage grpc server exited", slog.Any("err", serveErr))
}
}()
g.logger.InfoContext(ctx, "storage grpc listening",
slog.String("addr", g.opts.StorageGRPCAddress))
return nil
}
func (g *Gateway) waitForShutdown(
ctx context.Context,
srv *http.Server,
errCh <-chan error,
sigCh <-chan os.Signal,
) error {
select {
case err := <-errCh:
g.stopStorageGRPC()
g.stopEngine()
return err
case sig := <-sigCh:
g.logger.InfoContext(ctx, "shutting down on signal",
slog.String("signal", sig.String()))
shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
_ = srv.Shutdown(shutdownCtx)
g.stopStorageGRPC()
g.stopEngine()
return nil
}
}
func (g *Gateway) logStartupExpectations(ctx context.Context) {
g.logger.InfoContext(ctx, "gateway listening",
slog.String("addr", g.opts.HTTPAddress))
switch {
case g.opts.EngineBinary != "":
g.logger.InfoContext(ctx, "engine grpc expected",
slog.String("addr", g.opts.EngineAddress))
if g.opts.StorageGRPCAddress != "" {
g.logger.InfoContext(ctx, "public storage grpc expected",
slog.String("addr", g.opts.StorageGRPCAddress))
}
default:
g.logger.InfoContext(ctx, "engine subprocess disabled; query routes will return Unimplemented")
}
}
package grpcserver
import (
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"github.com/vantaboard/bigquery-emulator/gateway/handlers/bqanalyticshub"
"github.com/vantaboard/bigquery-emulator/gateway/handlers/bqconnection"
"github.com/vantaboard/bigquery-emulator/gateway/handlers/bqreservation"
"github.com/vantaboard/bigquery-emulator/gateway/handlers/bqstorage"
"github.com/vantaboard/bigquery-emulator/gateway/handlers/bqv2grpc"
"google.golang.org/grpc"
)
// RegisterAll wires every public gRPC surface the gateway exposes on the
// storage listener: BigQuery Storage Read/Write, Connection, Reservation,
// Analytics Hub, and BigQuery v2 resource services.
func RegisterAll(srv grpc.ServiceRegistrar, eng *engine.Client, deps handlers.Dependencies) {
if srv == nil {
return
}
bqstorage.RegisterGRPC(srv, eng)
bqconnection.RegisterGRPC(srv, deps)
bqreservation.RegisterGRPC(srv)
bqanalyticshub.RegisterGRPC(srv)
bqv2grpc.RegisterGRPC(srv, deps)
}
// Package grpcserver hosts the public BigQuery Storage gRPC surface on the
// gateway process. Official client libraries dial
// google.cloud.bigquery.storage.v1.BigQueryRead / BigQueryWrite; the shim
// in gateway/handlers/bqstorage adapts those RPCs to the engine's internal
// bigquery_emulator.v1.StorageRead / StorageWrite contracts.
package grpcserver
import (
"errors"
"fmt"
"net"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
// Server wraps the public Storage gRPC listener the gateway owns.
type Server struct {
srv *grpc.Server
lis net.Listener
}
// Start binds address and registers every public gRPC surface the gateway
// exposes (Storage, Connection, Reservation, Analytics Hub, BigQuery v2).
// eng may be nil in gateway-only mode; storage RPCs then return UNAVAILABLE.
func Start(address string, eng *engine.Client, deps handlers.Dependencies) (*Server, error) {
if address == "" {
return nil, errors.New("grpcserver: empty address")
}
lis, err := net.Listen("tcp", address)
if err != nil {
return nil, fmt.Errorf("grpcserver: listen %s: %w", address, err)
}
srv := grpc.NewServer(grpc.Creds(insecure.NewCredentials()))
RegisterAll(srv, eng, deps)
return &Server{srv: srv, lis: lis}, nil
}
// Serve blocks until the server stops or the listener fails.
func (s *Server) Serve() error {
if s == nil || s.srv == nil || s.lis == nil {
return errors.New("grpcserver: server not initialized")
}
return s.srv.Serve(s.lis)
}
// Stop gracefully shuts down the gRPC server.
func (s *Server) Stop() {
if s == nil || s.srv == nil {
return
}
s.srv.GracefulStop()
}
// Close stops the server and closes the listener.
func (s *Server) Close() error {
if s == nil {
return nil
}
s.Stop()
if s.lis != nil {
return s.lis.Close()
}
return nil
}
package handlers
import (
"net/http"
"regexp"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
// abortSessionRE matches `CALL BQ.ABORT_SESSION([session_id])` system
// procedure calls bigframes issues when closing a session.
var abortSessionRE = regexp.MustCompile(
`(?is)^\s*CALL\s+BQ\.ABORT_SESSION\s*(?:\(\s*(?:'([^']*)'|SESSION_ID\s*\(\s*\))?\s*\))?\s*;?\s*$`)
func parseAbortSessionSQL(sql string) bool {
return abortSessionRE.MatchString(strings.TrimSpace(sql))
}
// handleAbortSessionQuery is a no-op stub for BQ.ABORT_SESSION so bigframes
// session teardown succeeds against the emulator.
func handleAbortSessionQuery(
deps Dependencies,
w http.ResponseWriter,
projectID, location string,
connProps []bqtypes.ConnectionProperty,
) {
start := time.Now().UTC()
end := start
sessionInfo := sessionStore(&deps).Resolve(projectID, location, false, connProps)
job := deps.Jobs.CompleteQueryWithResult(projectID, location, 0, start, end, &jobs.QueryResult{})
stampJobSessionInfo(job, sessionInfo)
out := assembleQueryResponse(job, nil, nil, nil, nil, "", "", nil, nil, sessionInfo)
writeJSON(w, http.StatusOK, out)
}
package bqanalyticshub
import (
"context"
"sync"
"cloud.google.com/go/bigquery/analyticshub/apiv1/analyticshubpb"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/emptypb"
)
// Server implements the Analytics Hub gRPC surface with in-memory storage.
type Server struct {
analyticshubpb.UnimplementedAnalyticsHubServiceServer
exchanges sync.Map // name string -> *analyticshubpb.DataExchange
listings sync.Map // name string -> *analyticshubpb.Listing
}
// RegisterGRPC wires AnalyticsHubService onto srv.
func RegisterGRPC(srv grpc.ServiceRegistrar) {
if srv == nil {
return
}
analyticshubpb.RegisterAnalyticsHubServiceServer(srv, &Server{})
}
// CreateDataExchange registers a data exchange. Returns AlreadyExists when
// the name is taken.
func (s *Server) CreateDataExchange(
_ context.Context,
req *analyticshubpb.CreateDataExchangeRequest,
) (*analyticshubpb.DataExchange, error) {
if req == nil || req.GetParent() == "" || req.GetDataExchangeId() == "" {
return nil, status.Error(codes.InvalidArgument, "parent and data_exchange_id are required")
}
name := req.GetParent() + "/dataExchanges/" + req.GetDataExchangeId()
in := req.GetDataExchange()
if in == nil {
in = &analyticshubpb.DataExchange{}
}
out := &analyticshubpb.DataExchange{
Name: name,
DisplayName: in.GetDisplayName(),
Description: in.GetDescription(),
}
if _, loaded := s.exchanges.LoadOrStore(name, out); loaded {
return nil, status.Errorf(codes.AlreadyExists, "DataExchange %s already exists", name)
}
return out, nil
}
// GetDataExchange returns a stored data exchange.
func (s *Server) GetDataExchange(
_ context.Context,
req *analyticshubpb.GetDataExchangeRequest,
) (*analyticshubpb.DataExchange, error) {
if req == nil || req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "name is required")
}
v, ok := s.exchanges.Load(req.GetName())
if !ok {
return nil, status.Errorf(codes.NotFound, "DataExchange %s not found", req.GetName())
}
ex, _ := v.(*analyticshubpb.DataExchange)
return ex, nil
}
// DeleteDataExchange removes a data exchange and its listings.
func (s *Server) DeleteDataExchange(
_ context.Context,
req *analyticshubpb.DeleteDataExchangeRequest,
) (*emptypb.Empty, error) {
if req == nil || req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "name is required")
}
if _, ok := s.exchanges.LoadAndDelete(req.GetName()); !ok {
return nil, status.Errorf(codes.NotFound, "DataExchange %s not found", req.GetName())
}
prefix := req.GetName() + "/listings/"
s.listings.Range(func(key, _ any) bool {
if name, ok := key.(string); ok && len(name) > len(prefix) && name[:len(prefix)] == prefix {
s.listings.Delete(name)
}
return true
})
return &emptypb.Empty{}, nil
}
// CreateListing registers a listing under a data exchange.
func (s *Server) CreateListing(
_ context.Context,
req *analyticshubpb.CreateListingRequest,
) (*analyticshubpb.Listing, error) {
if req == nil || req.GetParent() == "" || req.GetListingId() == "" {
return nil, status.Error(codes.InvalidArgument, "parent and listing_id are required")
}
if _, ok := s.exchanges.Load(req.GetParent()); !ok {
return nil, status.Errorf(codes.NotFound, "DataExchange %s not found", req.GetParent())
}
name := req.GetParent() + "/listings/" + req.GetListingId()
in := req.GetListing()
if in == nil {
in = &analyticshubpb.Listing{}
}
out := &analyticshubpb.Listing{
Name: name,
DisplayName: in.GetDisplayName(),
Description: in.GetDescription(),
Source: in.GetSource(),
}
if _, loaded := s.listings.LoadOrStore(name, out); loaded {
return nil, status.Errorf(codes.AlreadyExists, "Listing %s already exists", name)
}
return out, nil
}
package bqconnection
import (
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
)
// FixtureAnnotation is stored on Connection.description when created in
// fixture mode so EXTERNAL_QUERY resolution can locate snapshot data.
const FixtureAnnotation = "bqemu:fixture"
// ModeForConnection resolves the config mode for a connection resource name.
func ModeForConnection(cfg *sourceconfig.Config, name string) sourceconfig.Mode {
if cfg == nil {
return sourceconfig.ModeFixture
}
return cfg.ResolveConnection(name)
}
// AnnotateFixtureDescription prefixes description when mode is fixture.
func AnnotateFixtureDescription(cfg *sourceconfig.Config, name, description string) string {
if ModeForConnection(cfg, name) != sourceconfig.ModeFixture {
return description
}
if strings.Contains(description, FixtureAnnotation) {
return description
}
if description == "" {
return FixtureAnnotation
}
return FixtureAnnotation + " " + description
}
package bqconnection
import (
"context"
"strings"
"cloud.google.com/go/bigquery/connection/apiv1/connectionpb"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/emptypb"
"google.golang.org/protobuf/types/known/fieldmaskpb"
)
// Server implements the BigQuery Connection API gRPC surface.
type Server struct {
connectionpb.UnimplementedConnectionServiceServer
store *Store
cfg *sourceconfig.Config
}
// RegisterGRPC wires ConnectionService onto srv.
func RegisterGRPC(srv grpc.ServiceRegistrar, deps handlers.Dependencies) {
if srv == nil {
return
}
st, err := OpenStore(deps.ExternalSources)
if err != nil {
return
}
connectionpb.RegisterConnectionServiceServer(srv, &Server{store: st, cfg: deps.ExternalSources})
}
// ListConnections returns connections under parent.
func (s *Server) ListConnections(
_ context.Context,
req *connectionpb.ListConnectionsRequest,
) (*connectionpb.ListConnectionsResponse, error) {
if req == nil || req.GetParent() == "" {
return nil, status.Error(codes.InvalidArgument, "parent is required")
}
return &connectionpb.ListConnectionsResponse{
Connections: s.store.List(req.GetParent()),
}, nil
}
// CreateConnection stores a connection record on disk.
func (s *Server) CreateConnection(
_ context.Context,
req *connectionpb.CreateConnectionRequest,
) (*connectionpb.Connection, error) {
if req == nil || req.GetParent() == "" || req.GetConnectionId() == "" {
return nil, status.Error(codes.InvalidArgument, "parent and connection_id are required")
}
name := req.GetParent() + "/connections/" + req.GetConnectionId()
if _, ok := s.store.Get(name); ok {
return nil, status.Errorf(codes.AlreadyExists, "Connection %s already exists", name)
}
conn := req.GetConnection()
if conn == nil {
conn = &connectionpb.Connection{}
}
out, err := CloneConnection(conn)
if err != nil {
return nil, status.Errorf(codes.Internal, "clone connection: %v", err)
}
out.Name = name
out.FriendlyName = conn.GetFriendlyName()
out.Description = AnnotateFixtureDescription(s.cfg, name, conn.GetDescription())
copyConnectionProperties(out, conn)
if err := s.store.Put(out); err != nil {
return nil, status.Errorf(codes.Internal, "persist connection: %v", err)
}
return out, nil
}
// GetConnection returns a previously created connection.
func (s *Server) GetConnection(
_ context.Context,
req *connectionpb.GetConnectionRequest,
) (*connectionpb.Connection, error) {
if req == nil || req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "name is required")
}
conn, ok := s.store.Get(req.GetName())
if !ok {
return nil, status.Errorf(codes.NotFound, "Connection %s not found", req.GetName())
}
return conn, nil
}
// UpdateConnection mutates an existing connection and persists it.
func (s *Server) UpdateConnection(
_ context.Context,
req *connectionpb.UpdateConnectionRequest,
) (*connectionpb.Connection, error) {
if req == nil || req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "name is required")
}
existing, ok := s.store.Get(req.GetName())
if !ok {
return nil, status.Errorf(codes.NotFound, "Connection %s not found", req.GetName())
}
patch := req.GetConnection()
if patch == nil {
return nil, status.Error(codes.InvalidArgument, "connection is required")
}
out, err := CloneConnection(existing)
if err != nil {
return nil, status.Errorf(codes.Internal, "clone connection: %v", err)
}
applyConnectionUpdateMask(out, patch, req.GetUpdateMask())
if err := s.store.Put(out); err != nil {
return nil, status.Errorf(codes.Internal, "persist connection: %v", err)
}
return out, nil
}
// DeleteConnection removes a connection record.
func (s *Server) DeleteConnection(
_ context.Context,
req *connectionpb.DeleteConnectionRequest,
) (*emptypb.Empty, error) {
if req == nil || req.GetName() == "" {
return nil, status.Error(codes.InvalidArgument, "name is required")
}
if _, ok := s.store.Get(req.GetName()); !ok {
return nil, status.Errorf(codes.NotFound, "Connection %s not found", req.GetName())
}
if err := s.store.Delete(req.GetName()); err != nil {
return nil, status.Errorf(codes.Internal, "delete connection: %v", err)
}
return &emptypb.Empty{}, nil
}
func applyConnectionUpdateMask(dst, patch *connectionpb.Connection, mask *fieldmaskpb.FieldMask) {
if dst == nil || patch == nil {
return
}
paths := mask.GetPaths()
if len(paths) == 0 {
if patch.FriendlyName != "" {
dst.FriendlyName = patch.FriendlyName
}
if patch.Description != "" {
dst.Description = patch.Description
}
copyConnectionProperties(dst, patch)
return
}
for _, p := range paths {
switch strings.TrimSpace(strings.ToLower(p)) {
case "friendly_name", "friendlyname":
dst.FriendlyName = patch.FriendlyName
case "description":
dst.Description = patch.Description
case "cloud_sql", "cloudsql":
if patch.GetCloudSql() != nil {
dst.Properties = &connectionpb.Connection_CloudSql{CloudSql: patch.GetCloudSql()}
}
case "cloud_spanner", "cloudspanner":
if patch.GetCloudSpanner() != nil {
dst.Properties = &connectionpb.Connection_CloudSpanner{CloudSpanner: patch.GetCloudSpanner()}
}
case "aws":
if patch.GetAws() != nil {
dst.Properties = &connectionpb.Connection_Aws{Aws: patch.GetAws()}
}
case "azure":
if patch.GetAzure() != nil {
dst.Properties = &connectionpb.Connection_Azure{Azure: patch.GetAzure()}
}
case "cloud_resource", "cloudresource":
if patch.GetCloudResource() != nil {
dst.Properties = &connectionpb.Connection_CloudResource{CloudResource: patch.GetCloudResource()}
}
case "spark":
if patch.GetSpark() != nil {
dst.Properties = &connectionpb.Connection_Spark{Spark: patch.GetSpark()}
}
}
}
}
func copyConnectionProperties(dst, src *connectionpb.Connection) {
if dst == nil || src == nil {
return
}
switch p := src.Properties.(type) {
case *connectionpb.Connection_CloudSql:
dst.Properties = &connectionpb.Connection_CloudSql{CloudSql: p.CloudSql}
case *connectionpb.Connection_Aws:
dst.Properties = &connectionpb.Connection_Aws{Aws: p.Aws}
case *connectionpb.Connection_Azure:
dst.Properties = &connectionpb.Connection_Azure{Azure: p.Azure}
case *connectionpb.Connection_CloudSpanner:
dst.Properties = &connectionpb.Connection_CloudSpanner{CloudSpanner: p.CloudSpanner}
case *connectionpb.Connection_CloudResource:
dst.Properties = &connectionpb.Connection_CloudResource{CloudResource: p.CloudResource}
case *connectionpb.Connection_Spark:
dst.Properties = &connectionpb.Connection_Spark{Spark: p.Spark}
case *connectionpb.Connection_SalesforceDataCloud:
dst.Properties = &connectionpb.Connection_SalesforceDataCloud{SalesforceDataCloud: p.SalesforceDataCloud}
}
}
// Package bqconnection is the shallow-emulator skeleton for the
// BigQuery Connection API surface (gRPC, exposed at the storage gRPC
// port per docker-compose.yml). The gRPC layer is intentionally NOT
// registered in this skeleton because doing so would require:
//
// 1. Adding `cloud.google.com/go/bigquery/connection/apiv1/connectionpb`
// and the associated `cloud.google.com/go/iam/apiv1/iampb` Go
// dependencies, which transitively pull ~30 packages this repo
// does not currently link.
// 2. Building a connection-record storage layer (this repo's
// `backend/catalog/` is C++ and does not yet model connection
// records).
//
// Both are explicitly larger than the shallow-emulator port budget
// per `docs/ENGINE_POLICY.md`.
// The surface-mapping table below documents which failing-IT each
// intended handler symbol satisfies, so follow-up ports use a
// one-to-one mapping rather than a free-form rebuild.
//
// Failing-IT → intended handler mapping (shallow-emulator intake table):
//
// CreateAwsConnectionIT → connectionpb.ConnectionService.CreateConnection
// ⇒ gateway/handlers/bqconnection/server.go: CreateConnection
// ⇒ gateway/handlers/bqconnection/rest_handler.go (HTTP/JSON variant)
// ⇒ gateway/handlers/bqconnection/connection_properties.go: applyCloudSQLFromCreate,
// validateConnectionPropertiesOneof
//
// DeleteConnectionIT → connectionpb.ConnectionService.DeleteConnection
// ⇒ gateway/handlers/bqconnection/server.go: DeleteConnection
// GetConnectionIT → connectionpb.ConnectionService.GetConnection
// ⇒ gateway/handlers/bqconnection/server.go: GetConnection
// ShareConnectionIT → connectionpb.ConnectionService.{GetIamPolicy,SetIamPolicy}
// ⇒ gateway/handlers/bqconnection/server.go: {GetIamPolicy,SetIamPolicy}
// (currently UNIMPLEMENTED — IT will fail-fast)
// UpdateConnectionIT → connectionpb.ConnectionService.UpdateConnection
// ⇒ gateway/handlers/bqconnection/server.go: UpdateConnection
// ⇒ gateway/handlers/bqconnection/connection_mask_paths.go: applyConnectionUpdateMask
// ⇒ gateway/handlers/bqconnection/connection_update.go: per-field setters
//
// Storage adapter shim (deferred): connection-record helpers
// (GetConnectionRecord, PutConnectionRecord, ListConnectionRecords,
// DeleteConnectionRecord, IsNotFound) map onto
// this repo's `backend/storage/` once a connections table lands. The
// initial cut should keep them in-process (a `sync.Map`-backed store
// is fine for the live-IT track) and add a SQLite-backed
// implementation only when persistence becomes necessary.
package bqconnection
import (
"net/http"
)
// Register is the symbolic entry point the gateway will call once the
// gRPC surface lands. Until then the gateway routes the few REST
// shapes the Java client falls back to (POST /v1beta1/projects/...
// and equivalent gapic-rest paths) to NotImplementedHTTP below.
func Register(_ *http.ServeMux) {}
// NotImplementedHTTP returns a structured 501 for any Connection API
// REST probe the gateway might add ahead of the full gRPC port. The
// existing gateway/handlers.NotImplemented helper would do; this
// indirection keeps the package self-contained.
func NotImplementedHTTP(w http.ResponseWriter, _ *http.Request) {
const body = `{"error":{"code":501,"message":"BigQuery Connection API is not yet implemented by the emulator. See docs/ENGINE_POLICY.md and ROADMAP.md.","status":"notImplemented","errors":[{"reason":"notImplemented","message":"BigQuery Connection API is not yet implemented by the emulator.","domain":"global"}]}}`
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(http.StatusNotImplemented)
_, _ = w.Write([]byte(body))
}
package bqconnection
import (
"encoding/json"
"errors"
"os"
"path/filepath"
"sync"
"cloud.google.com/go/bigquery/connection/apiv1/connectionpb"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
"google.golang.org/protobuf/encoding/protojson"
)
const registryDir = "_registry"
// Store persists Connection records under $data_dir/external/connections/_registry/.
type Store struct {
path string
mu sync.RWMutex
byName map[string]*connectionpb.Connection
}
// OpenStore loads or creates the connection registry for dataDir.
func OpenStore(cfg *sourceconfig.Config) (*Store, error) {
root := ""
if cfg != nil {
root = cfg.ConnectionFixtureRoot()
}
if root == "" {
return &Store{byName: map[string]*connectionpb.Connection{}}, nil
}
dir := filepath.Join(root, registryDir)
if err := os.MkdirAll(dir, 0o750); err != nil {
return nil, err
}
path := filepath.Join(dir, "connections.json")
s := &Store{path: path, byName: map[string]*connectionpb.Connection{}}
if err := s.load(); err != nil {
return nil, err
}
return s, nil
}
func (s *Store) load() error {
if s == nil || s.path == "" {
return nil
}
raw, err := os.ReadFile(s.path) //nolint:gosec // operator-controlled data dir
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil
}
return err
}
var envelope struct {
Connections []*connectionpb.Connection `json:"connections"`
}
if err := json.Unmarshal(raw, &envelope); err != nil {
corruptPath := s.path + ".corrupt"
if renameErr := os.Rename(s.path, corruptPath); renameErr != nil {
return err
}
return nil
}
for _, c := range envelope.Connections {
if c == nil || c.Name == "" {
continue
}
s.byName[c.Name] = c
}
return nil
}
func (s *Store) persist() error {
if s == nil || s.path == "" {
return nil
}
s.mu.RLock()
items := make([]*connectionpb.Connection, 0, len(s.byName))
for _, c := range s.byName {
items = append(items, c)
}
s.mu.RUnlock()
raw, err := json.MarshalIndent(struct {
Connections []*connectionpb.Connection `json:"connections"`
}{Connections: items}, "", " ")
if err != nil {
return err
}
tmp := s.path + ".tmp"
if err := os.WriteFile(tmp, raw, 0o600); err != nil {
return err
}
return os.Rename(tmp, s.path)
}
// Put stores or replaces a connection by name.
func (s *Store) Put(conn *connectionpb.Connection) error {
if s == nil || conn == nil || conn.Name == "" {
return errors.New("connection name is required")
}
s.mu.Lock()
s.byName[conn.Name] = conn
s.mu.Unlock()
return s.persist()
}
// Get returns a connection by resource name.
func (s *Store) Get(name string) (*connectionpb.Connection, bool) {
if s == nil {
return nil, false
}
s.mu.RLock()
defer s.mu.RUnlock()
c, ok := s.byName[name]
return c, ok
}
// Delete removes a connection by name.
func (s *Store) Delete(name string) error {
if s == nil || name == "" {
return errors.New("connection name is required")
}
s.mu.Lock()
delete(s.byName, name)
s.mu.Unlock()
return s.persist()
}
// List returns all connections whose name has the given parent prefix.
func (s *Store) List(parent string) []*connectionpb.Connection {
if s == nil {
return nil
}
prefix := parent + "/connections/"
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]*connectionpb.Connection, 0)
for name, c := range s.byName {
if c == nil {
continue
}
if parent != "" && !hasParentPrefix(name, prefix) {
continue
}
out = append(out, c)
}
return out
}
func hasParentPrefix(name, prefix string) bool {
return len(name) > len(prefix) && name[:len(prefix)] == prefix
}
// CloneConnection returns a protojson round-tripped copy for safe mutation.
func CloneConnection(in *connectionpb.Connection) (*connectionpb.Connection, error) {
if in == nil {
return &connectionpb.Connection{}, nil
}
raw, err := protojson.Marshal(in)
if err != nil {
return nil, err
}
out := &connectionpb.Connection{}
if err := protojson.Unmarshal(raw, out); err != nil {
return nil, err
}
return out, nil
}
package bqreservation
import (
"context"
"cloud.google.com/go/bigquery/reservation/apiv1/reservationpb"
"google.golang.org/grpc"
)
// Server implements the shallow BigQuery Reservation API gRPC surface.
type Server struct {
reservationpb.UnimplementedReservationServiceServer
}
// RegisterGRPC wires ReservationService onto srv.
func RegisterGRPC(srv grpc.ServiceRegistrar) {
if srv == nil {
return
}
reservationpb.RegisterReservationServiceServer(srv, &Server{})
}
// ListCapacityCommitments returns an empty page.
func (s *Server) ListCapacityCommitments(
_ context.Context,
_ *reservationpb.ListCapacityCommitmentsRequest,
) (*reservationpb.ListCapacityCommitmentsResponse, error) {
return &reservationpb.ListCapacityCommitmentsResponse{
CapacityCommitments: []*reservationpb.CapacityCommitment{},
}, nil
}
// ListReservations returns an empty page.
func (s *Server) ListReservations(
_ context.Context,
_ *reservationpb.ListReservationsRequest,
) (*reservationpb.ListReservationsResponse, error) {
return &reservationpb.ListReservationsResponse{
Reservations: []*reservationpb.Reservation{},
}, nil
}
package bqstorage
import (
"errors"
"fmt"
"strconv"
"strings"
"time"
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
"github.com/apache/arrow/go/v18/arrow"
"github.com/apache/arrow/go/v18/arrow/array"
"github.com/apache/arrow/go/v18/arrow/memory"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
func arrowSchemaFromEngine(schema *enginepb.TableSchema) *arrow.Schema {
if schema == nil || len(schema.GetFields()) == 0 {
return arrow.NewSchema(nil, nil)
}
fields := make([]arrow.Field, 0, len(schema.GetFields()))
for _, f := range schema.GetFields() {
fields = append(fields, arrow.Field{
Name: f.GetName(),
Type: arrowTypeForBQ(f.GetType()),
Nullable: strings.ToUpper(f.GetMode()) != bqModeRequired,
})
}
return arrow.NewSchema(fields, nil)
}
func arrowTypeForBQ(t string) arrow.DataType {
switch strings.ToUpper(strings.TrimSpace(t)) {
case bqTypeINT64, bqTypeINTEGER:
return arrow.PrimitiveTypes.Int64
case bqTypeFLOAT64, bqTypeFLOAT:
return arrow.PrimitiveTypes.Float64
case bqTypeBOOL:
return arrow.FixedWidthTypes.Boolean
case bqTypeTIMESTAMP:
return &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "UTC"}
case bqTypeDATETIME:
return &arrow.TimestampType{Unit: arrow.Microsecond}
case bqTypeSTRING, bqTypeJSON, bqTypeGEOGRAPHY, bqTypeDATE, bqTypeTIME,
bqTypeBYTES, bqTypeNUMERIC, bqTypeBIGNUMERIC, bqTypeSTRUCT, bqTypeRECORD:
return arrow.BinaryTypes.String
default:
return arrow.BinaryTypes.String
}
}
func serializeArrowSchema(schema *enginepb.TableSchema) (*storagepb.ArrowSchema, error) {
as := arrowSchemaFromEngine(schema)
schemaBytes, err := serializeArrowIPCSchema(as)
if err != nil {
return nil, err
}
return &storagepb.ArrowSchema{SerializedSchema: schemaBytes}, nil
}
func rowsToArrowBatch(
schema *enginepb.TableSchema,
rows []*enginepb.DataRow,
) (*storagepb.ArrowRecordBatch, error) {
as := arrowSchemaFromEngine(schema)
mem := memory.NewGoAllocator()
b := array.NewRecordBuilder(mem, as)
defer b.Release()
for colIdx, field := range schema.GetFields() {
if appendErr := appendColumnValues(b.Field(colIdx), field.GetType(), rows, colIdx); appendErr != nil {
return nil, appendErr
}
}
rec := b.NewRecord()
defer rec.Release()
batchBytes, err := serializeArrowIPCRecordBatch(as, rec)
if err != nil {
return nil, err
}
return &storagepb.ArrowRecordBatch{
SerializedRecordBatch: batchBytes,
RowCount: int64(len(rows)),
}, nil
}
func appendColumnValues(
builder array.Builder,
bqType string,
rows []*enginepb.DataRow,
colIdx int,
) error {
switch strings.ToUpper(strings.TrimSpace(bqType)) {
case bqTypeINT64, bqTypeINTEGER:
return appendInt64Column(builder, rows, colIdx)
case bqTypeFLOAT64, bqTypeFLOAT:
return appendFloat64Column(builder, rows, colIdx)
case bqTypeBOOL:
return appendBoolColumn(builder, rows, colIdx)
case bqTypeTIMESTAMP:
return appendTimestampColumn(builder, rows, colIdx)
case bqTypeDATETIME:
return appendDatetimeColumn(builder, rows, colIdx)
default:
return appendStringColumn(builder, rows, colIdx)
}
}
func appendInt64Column(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
ib := builder.(*array.Int64Builder)
for _, row := range rows {
if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
ib.AppendNull()
continue
}
v, err := strconv.ParseInt(row.GetCells()[colIdx].GetStringValue(), 10, 64)
if err != nil {
return fmt.Errorf("column %d INT64 parse: %w", colIdx, err)
}
ib.Append(v)
}
return nil
}
func appendFloat64Column(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
fb := builder.(*array.Float64Builder)
for _, row := range rows {
if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
fb.AppendNull()
continue
}
v, err := strconv.ParseFloat(row.GetCells()[colIdx].GetStringValue(), 64)
if err != nil {
return fmt.Errorf("column %d FLOAT64 parse: %w", colIdx, err)
}
fb.Append(v)
}
return nil
}
func appendBoolColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
bb := builder.(*array.BooleanBuilder)
for _, row := range rows {
if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
bb.AppendNull()
continue
}
v, err := strconv.ParseBool(row.GetCells()[colIdx].GetStringValue())
if err != nil {
return fmt.Errorf("column %d BOOL parse: %w", colIdx, err)
}
bb.Append(v)
}
return nil
}
func appendTimestampColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
tb := builder.(*array.TimestampBuilder)
for _, row := range rows {
if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
tb.AppendNull()
continue
}
micros, err := timestampCellToMicros(row.GetCells()[colIdx].GetStringValue())
if err != nil {
return fmt.Errorf("column %d TIMESTAMP parse: %w", colIdx, err)
}
tb.Append(arrow.Timestamp(micros))
}
return nil
}
func appendDatetimeColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
tb := builder.(*array.TimestampBuilder)
for _, row := range rows {
if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
tb.AppendNull()
continue
}
micros, err := datetimeCellToMicros(row.GetCells()[colIdx].GetStringValue())
if err != nil {
return fmt.Errorf("column %d DATETIME parse: %w", colIdx, err)
}
tb.Append(arrow.Timestamp(micros))
}
return nil
}
func appendStringColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
sb := builder.(*array.StringBuilder)
for _, row := range rows {
if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
sb.AppendNull()
continue
}
sb.Append(row.GetCells()[colIdx].GetStringValue())
}
return nil
}
func timestampCellToMicros(s string) (int64, error) {
if strings.TrimSpace(s) == "" {
return 0, errors.New("empty timestamp")
}
microsStr, err := bqtypes.TimestampStringToMicros(s)
if err != nil {
return 0, err
}
return strconv.ParseInt(microsStr, 10, 64)
}
func datetimeCellToMicros(s string) (int64, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, errors.New("empty datetime")
}
s = strings.Replace(s, "T", " ", 1)
layouts := []string{
"2006-01-02 15:04:05.999999",
"2006-01-02 15:04:05",
}
var lastErr error
for _, layout := range layouts {
t, err := time.Parse(layout, s)
if err == nil {
return t.Unix()*1_000_000 + int64(t.Nanosecond()/1000), nil
}
lastErr = err
}
return 0, lastErr
}
package bqstorage
import (
"bytes"
"errors"
"fmt"
"io"
"github.com/apache/arrow/go/v18/arrow"
"github.com/apache/arrow/go/v18/arrow/ipc"
)
type countingReader struct {
r io.Reader
pos int64
}
func (c *countingReader) Read(p []byte) (int, error) {
n, err := c.r.Read(p)
c.pos += int64(n)
return n, err
}
func ipcMessageAt(data []byte, index int) ([]byte, error) {
r := &countingReader{r: bytes.NewReader(data)}
msgRdr := ipc.NewMessageReader(r)
defer msgRdr.Release()
var (
start int64
end int64
)
for i := 0; ; i++ {
msgStart := r.pos
msg, err := msgRdr.Message()
if err != nil {
if errors.Is(err, io.EOF) {
return nil, fmt.Errorf("arrow ipc: message index %d out of range", index)
}
return nil, err
}
msgEnd := r.pos
msg.Release()
if i == index {
start = msgStart
end = msgEnd
break
}
}
return data[start:end], nil
}
func serializeArrowIPCSchema(as *arrow.Schema) ([]byte, error) {
var stream bytes.Buffer
w := ipc.NewWriter(&stream, ipc.WithSchema(as))
if err := w.Close(); err != nil {
return nil, err
}
return ipcMessageAt(stream.Bytes(), 0)
}
func serializeArrowIPCRecordBatch(as *arrow.Schema, rec arrow.Record) ([]byte, error) {
var stream bytes.Buffer
w := ipc.NewWriter(&stream, ipc.WithSchema(as))
if err := w.Write(rec); err != nil {
_ = w.Close()
return nil, err
}
if err := w.Close(); err != nil {
return nil, err
}
// Index 0 is schema; index 1 is the record batch (EOS follows).
return ipcMessageAt(stream.Bytes(), 1)
}
package bqstorage
import (
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"time"
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
goavro "github.com/linkedin/goavro/v2"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
const avroRecordName = "root"
func serializeAvroSchema(schema *enginepb.TableSchema) (*storagepb.AvroSchema, error) {
schemaJSON, err := avroSchemaJSONFromEngine(schema)
if err != nil {
return nil, err
}
return &storagepb.AvroSchema{Schema: schemaJSON}, nil
}
func avroSchemaJSONFromEngine(schema *enginepb.TableSchema) (string, error) {
fields := make([]map[string]any, 0, len(schema.GetFields()))
for _, f := range schema.GetFields() {
avroField, err := engineFieldToAvroField(f)
if err != nil {
return "", err
}
fields = append(fields, avroField)
}
root := map[string]any{
avroKeyType: avroTypeRecord,
avroKeyName: avroRecordName,
"fields": fields,
}
b, err := json.Marshal(root)
if err != nil {
return "", fmt.Errorf("marshal Avro schema: %w", err)
}
return string(b), nil
}
func engineFieldToAvroField(f *enginepb.FieldSchema) (map[string]any, error) {
if f == nil {
return nil, errors.New("nil field schema")
}
typ := bqTypeToAvroType(f.GetType())
if strings.ToUpper(f.GetMode()) != bqModeRequired {
typ = []any{"null", typ}
}
return map[string]any{
avroKeyName: f.GetName(),
avroKeyType: typ,
}, nil
}
func bqTypeToAvroType(t string) any {
switch strings.ToUpper(strings.TrimSpace(t)) {
case bqTypeBOOL:
return "boolean"
case bqTypeINT64, bqTypeINTEGER:
return avroTypeLong
case bqTypeFLOAT64, bqTypeFLOAT:
return "double"
case bqTypeBYTES:
return avroTypeBytes
case bqTypeSTRING:
return avroTypeString
case bqTypeDATE:
return map[string]any{avroKeyType: "int", avroKeyLogicalType: "date"}
case bqTypeDATETIME:
return map[string]any{avroKeyType: avroTypeString, avroKeyLogicalType: "datetime"}
case bqTypeTIMESTAMP:
return map[string]any{avroKeyType: avroTypeLong, avroKeyLogicalType: "timestamp-micros"}
case bqTypeTIME:
return map[string]any{avroKeyType: avroTypeLong, avroKeyLogicalType: "time-micros"}
case bqTypeNUMERIC:
return map[string]any{
avroKeyType: avroTypeBytes,
avroKeyLogicalType: "decimal",
"precision": 38,
"scale": 9,
}
case bqTypeBIGNUMERIC:
return map[string]any{
avroKeyType: avroTypeBytes,
avroKeyLogicalType: "decimal",
"precision": 77,
"scale": 38,
}
case bqTypeGEOGRAPHY:
return map[string]any{avroKeyType: avroTypeString, "sqlType": bqTypeGEOGRAPHY}
case bqTypeJSON:
return map[string]any{avroKeyType: avroTypeString, "sqlType": bqTypeJSON}
case bqTypeSTRUCT, bqTypeRECORD:
// Nested structs are lowered to string cells in the engine shim today.
return avroTypeString
default:
return avroTypeString
}
}
func rowsToAvroBatch(
schema *enginepb.TableSchema,
rows []*enginepb.DataRow,
) (*storagepb.AvroRows, error) {
schemaJSON, err := avroSchemaJSONFromEngine(schema)
if err != nil {
return nil, err
}
codec, err := goavro.NewCodec(schemaJSON)
if err != nil {
return nil, fmt.Errorf("create Avro codec: %w", err)
}
var binary []byte
for _, row := range rows {
native, convErr := engineRowToAvroNative(schema, row)
if convErr != nil {
return nil, convErr
}
buf, encErr := codec.BinaryFromNative(nil, native)
if encErr != nil {
return nil, fmt.Errorf("encode Avro row: %w", encErr)
}
binary = append(binary, buf...)
}
return &storagepb.AvroRows{
SerializedBinaryRows: binary,
RowCount: int64(len(rows)),
}, nil
}
func engineRowToAvroNative(
schema *enginepb.TableSchema,
row *enginepb.DataRow,
) (map[string]any, error) {
out := make(map[string]any, len(schema.GetFields()))
for colIdx, field := range schema.GetFields() {
var cell *enginepb.Cell
if colIdx < len(row.GetCells()) {
cell = row.GetCells()[colIdx]
}
val, err := cellToAvroNative(field, cell)
if err != nil {
return nil, fmt.Errorf("column %q: %w", field.GetName(), err)
}
out[field.GetName()] = val
}
return out, nil
}
func cellToAvroNative(field *enginepb.FieldSchema, cell *enginepb.Cell) (any, error) {
nullable := strings.ToUpper(field.GetMode()) != bqModeRequired
nullCell := cell == nil || cell.GetNullValue()
if nullCell {
if nullable {
return nil, nil
}
return nil, errors.New("required column is null")
}
raw := strings.TrimSpace(cell.GetStringValue())
typ := strings.ToUpper(strings.TrimSpace(field.GetType()))
var val any
var err error
switch typ {
case bqTypeBOOL:
val, err = strconv.ParseBool(raw)
case bqTypeINT64, bqTypeINTEGER:
val, err = strconv.ParseInt(raw, 10, 64)
case bqTypeFLOAT64, bqTypeFLOAT:
val, err = strconv.ParseFloat(raw, 64)
case bqTypeTIMESTAMP:
micros, tsErr := timestampCellToMicros(raw)
if tsErr != nil {
err = tsErr
} else {
val = micros
}
case bqTypeDATE:
val, err = dateStringToDays(raw)
case bqTypeBYTES:
val = []byte(raw)
default:
val = raw
}
if err != nil {
return nil, err
}
if nullable {
return unionNative(typ, val), nil
}
return val, nil
}
func dateStringToDays(s string) (int32, error) {
t, err := time.Parse("2006-01-02", strings.TrimSpace(s))
if err != nil {
return 0, err
}
epoch := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
return int32(t.Sub(epoch).Hours() / 24), nil
}
func unionNative(bqType string, val any) map[string]any {
switch strings.ToUpper(strings.TrimSpace(bqType)) {
case bqTypeBOOL:
return map[string]any{"boolean": val}
case bqTypeINT64, bqTypeINTEGER, bqTypeTIMESTAMP:
return map[string]any{avroTypeLong: val}
case bqTypeFLOAT64, bqTypeFLOAT:
return map[string]any{"double": val}
case bqTypeBYTES, bqTypeNUMERIC, bqTypeBIGNUMERIC:
return map[string]any{avroTypeBytes: val}
case bqTypeDATE:
return map[string]any{"int": val}
default:
return map[string]any{avroTypeString: val}
}
}
package bqstorage
import (
"strings"
"time"
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/protobuf/types/known/timestamppb"
)
func engineCreateReadSessionRequest(
in *storagepb.CreateReadSessionRequest,
) *enginepb.CreateReadSessionRequest {
if in == nil {
return nil
}
out := &enginepb.CreateReadSessionRequest{
Parent: in.GetParent(),
MaxStreamCount: in.GetMaxStreamCount(),
}
if rs := in.GetReadSession(); rs != nil {
out.ReadSession = &enginepb.ReadSession{
Table: rs.GetTable(),
}
if opts := rs.GetReadOptions(); opts != nil {
out.ReadSession.ReadOptions = &enginepb.ReadOptions{
SelectedFields: append([]string(nil), opts.GetSelectedFields()...),
RowRestriction: opts.GetRowRestriction(),
}
}
}
return out
}
func publicReadSessionFromEngine(
in *enginepb.ReadSession,
dataFormat storagepb.DataFormat,
) (*storagepb.ReadSession, error) {
if in == nil {
return nil, nil
}
out := &storagepb.ReadSession{
Name: in.GetName(),
Table: in.GetTable(),
}
if opts := in.GetReadOptions(); opts != nil {
out.ReadOptions = &storagepb.ReadSession_TableReadOptions{
SelectedFields: append([]string(nil), opts.GetSelectedFields()...),
RowRestriction: opts.GetRowRestriction(),
}
}
for _, st := range in.GetStreams() {
out.Streams = append(out.Streams, &storagepb.ReadStream{Name: st.GetName()})
}
switch dataFormat {
case storagepb.DataFormat_ARROW:
arrowSchema, err := serializeArrowSchema(in.GetSchema())
if err != nil {
return nil, err
}
out.Schema = &storagepb.ReadSession_ArrowSchema{ArrowSchema: arrowSchema}
out.DataFormat = storagepb.DataFormat_ARROW
case storagepb.DataFormat_AVRO:
avroSchema, err := serializeAvroSchema(in.GetSchema())
if err != nil {
return nil, err
}
out.Schema = &storagepb.ReadSession_AvroSchema{AvroSchema: avroSchema}
out.DataFormat = storagepb.DataFormat_AVRO
default:
out.DataFormat = storagepb.DataFormat_ARROW
if arrowSchema, err := serializeArrowSchema(in.GetSchema()); err == nil {
out.Schema = &storagepb.ReadSession_ArrowSchema{ArrowSchema: arrowSchema}
}
}
return out, nil
}
func engineTableSchemaToPublic(in *enginepb.TableSchema) *storagepb.TableSchema {
if in == nil {
return nil
}
out := &storagepb.TableSchema{}
for _, f := range in.GetFields() {
out.Fields = append(out.Fields, engineFieldToPublic(f))
}
return out
}
func engineFieldToPublic(f *enginepb.FieldSchema) *storagepb.TableFieldSchema {
if f == nil {
return nil
}
return &storagepb.TableFieldSchema{
Name: f.GetName(),
Type: engineTypeToPublic(f.GetType()),
Mode: engineModeToPublic(f.GetMode()),
Description: f.GetDescription(),
}
}
func engineTypeToPublic(t string) storagepb.TableFieldSchema_Type {
switch strings.ToUpper(strings.TrimSpace(t)) {
case bqTypeSTRING:
return storagepb.TableFieldSchema_STRING
case bqTypeBYTES:
return storagepb.TableFieldSchema_BYTES
case bqTypeINT64:
return storagepb.TableFieldSchema_INT64
case bqTypeFLOAT64:
return storagepb.TableFieldSchema_DOUBLE
case bqTypeBOOL:
return storagepb.TableFieldSchema_BOOL
case bqTypeTIMESTAMP:
return storagepb.TableFieldSchema_TIMESTAMP
case bqTypeDATE:
return storagepb.TableFieldSchema_DATE
case bqTypeTIME:
return storagepb.TableFieldSchema_TIME
case bqTypeDATETIME:
return storagepb.TableFieldSchema_DATETIME
case bqTypeNUMERIC:
return storagepb.TableFieldSchema_NUMERIC
case bqTypeBIGNUMERIC:
return storagepb.TableFieldSchema_BIGNUMERIC
case bqTypeJSON:
return storagepb.TableFieldSchema_JSON
case bqTypeGEOGRAPHY:
return storagepb.TableFieldSchema_GEOGRAPHY
case bqTypeSTRUCT, bqTypeRECORD:
return storagepb.TableFieldSchema_STRUCT
default:
return storagepb.TableFieldSchema_STRING
}
}
func engineModeToPublic(m string) storagepb.TableFieldSchema_Mode {
switch strings.ToUpper(strings.TrimSpace(m)) {
case bqModeRequired:
return storagepb.TableFieldSchema_REQUIRED
case bqModeRepeated:
return storagepb.TableFieldSchema_REPEATED
default:
return storagepb.TableFieldSchema_NULLABLE
}
}
func publicWriteTypeToEngine(t storagepb.WriteStream_Type) enginepb.WriteStream_Type {
switch t {
case storagepb.WriteStream_COMMITTED:
return enginepb.WriteStream_COMMITTED
case storagepb.WriteStream_PENDING:
return enginepb.WriteStream_PENDING
case storagepb.WriteStream_BUFFERED:
return enginepb.WriteStream_BUFFERED
default:
return enginepb.WriteStream_COMMITTED
}
}
func engineWriteTypeToPublic(t enginepb.WriteStream_Type) storagepb.WriteStream_Type {
switch t {
case enginepb.WriteStream_COMMITTED:
return storagepb.WriteStream_COMMITTED
case enginepb.WriteStream_PENDING:
return storagepb.WriteStream_PENDING
case enginepb.WriteStream_BUFFERED:
return storagepb.WriteStream_BUFFERED
default:
return storagepb.WriteStream_TYPE_UNSPECIFIED
}
}
func publicWriteStreamFromEngine(in *enginepb.WriteStream) *storagepb.WriteStream {
if in == nil {
return nil
}
out := &storagepb.WriteStream{
Name: in.GetName(),
Type: engineWriteTypeToPublic(in.GetType()),
TableSchema: engineTableSchemaToPublic(in.GetSchema()),
}
if ts := in.GetCreateTime(); ts != "" {
if t, err := time.Parse(time.RFC3339, ts); err == nil {
out.CreateTime = timestamppb.New(t)
}
}
return out
}
func engineWriteStreamFromPublic(in *storagepb.WriteStream) *enginepb.WriteStream {
if in == nil {
return nil
}
return &enginepb.WriteStream{
Type: publicWriteTypeToEngine(in.GetType()),
}
}
package bqstorage
import "math"
func uint64ToSignedInt64(v uint64) int64 {
if v > uint64(math.MaxInt64) {
return math.MaxInt64
}
return int64(v)
}
package bqstorage
import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protodesc"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/types/descriptorpb"
"google.golang.org/protobuf/types/dynamicpb"
)
func protoDataToEngineRows(
ctx context.Context,
engineClient *engine.Client,
writeStream string,
data *storagepb.AppendRowsRequest_ProtoData,
cachedDesc **descriptorpb.DescriptorProto,
) ([]*enginepb.DataRow, error) {
if data == nil {
return nil, nil
}
rows := data.GetRows()
if rows == nil || len(rows.GetSerializedRows()) == 0 {
return nil, nil
}
desc, err := resolveProtoDescriptor(ctx, engineClient, writeStream, data, cachedDesc)
if err != nil {
return nil, err
}
msgDesc, err := messageDescriptor(desc)
if err != nil {
return nil, err
}
out := make([]*enginepb.DataRow, 0, len(rows.GetSerializedRows()))
for i, raw := range rows.GetSerializedRows() {
msg := dynamicpb.NewMessage(msgDesc)
if err := proto.Unmarshal(raw, msg); err != nil {
return nil, fmt.Errorf("row %d unmarshal: %w", i, err)
}
row, err := dynamicMessageToDataRow(msg)
if err != nil {
return nil, fmt.Errorf("row %d decode: %w", i, err)
}
out = append(out, row)
}
return out, nil
}
func resolveProtoDescriptor(
ctx context.Context,
engineClient *engine.Client,
writeStream string,
data *storagepb.AppendRowsRequest_ProtoData,
cachedDesc **descriptorpb.DescriptorProto,
) (*descriptorpb.DescriptorProto, error) {
if cachedDesc == nil {
return nil, errors.New("proto_rows missing writer_schema.proto_descriptor")
}
if desc := data.GetWriterSchema().GetProtoDescriptor(); desc != nil {
*cachedDesc = desc
return desc, nil
}
if *cachedDesc != nil {
return *cachedDesc, nil
}
if engineClient == nil || engineClient.StorageWrite == nil || writeStream == "" {
return nil, errors.New("proto_rows missing writer_schema.proto_descriptor")
}
stream, err := engineClient.StorageWrite.GetWriteStream(ctx, &enginepb.GetWriteStreamRequest{
Name: writeStream,
})
if err != nil {
return nil, fmt.Errorf("proto_rows missing writer_schema.proto_descriptor (GetWriteStream: %w)", err)
}
desc := descriptorFromEngineTableSchema(stream.GetSchema())
if desc == nil {
return nil, errors.New("proto_rows missing writer_schema.proto_descriptor")
}
*cachedDesc = desc
return desc, nil
}
func descriptorFromEngineTableSchema(schema *enginepb.TableSchema) *descriptorpb.DescriptorProto {
if schema == nil || len(schema.GetFields()) == 0 {
return nil
}
desc := &descriptorpb.DescriptorProto{Name: new("Row")}
for i, field := range schema.GetFields() {
if field == nil {
continue
}
desc.Field = append(desc.Field, &descriptorpb.FieldDescriptorProto{
Name: new(field.GetName()),
Number: new(int32(i + 1)),
Label: engineModeToProtoLabel(field.GetMode()),
Type: engineTypeToProtoType(field.GetType()),
})
}
if len(desc.Field) == 0 {
return nil
}
return desc
}
func engineModeToProtoLabel(mode string) *descriptorpb.FieldDescriptorProto_Label {
switch strings.ToUpper(strings.TrimSpace(mode)) {
case bqModeRequired:
return descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum()
case bqModeRepeated:
return descriptorpb.FieldDescriptorProto_LABEL_REPEATED.Enum()
default:
return descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum()
}
}
func engineTypeToProtoType(t string) *descriptorpb.FieldDescriptorProto_Type {
switch strings.ToUpper(strings.TrimSpace(t)) {
case bqTypeBOOL:
return descriptorpb.FieldDescriptorProto_TYPE_BOOL.Enum()
case bqTypeINT64, bqTypeINTEGER:
return descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum()
case bqTypeFLOAT64, "DOUBLE":
return descriptorpb.FieldDescriptorProto_TYPE_DOUBLE.Enum()
case bqTypeBYTES:
return descriptorpb.FieldDescriptorProto_TYPE_BYTES.Enum()
case bqTypeDATE:
return descriptorpb.FieldDescriptorProto_TYPE_INT32.Enum()
case bqTypeTIMESTAMP:
return descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum()
case bqTypeDATETIME, bqTypeTIME:
return descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum()
default:
return descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum()
}
}
func messageDescriptor(desc *descriptorpb.DescriptorProto) (protoreflect.MessageDescriptor, error) {
if desc == nil {
return nil, errors.New("nil descriptor")
}
fileDesc := &descriptorpb.FileDescriptorProto{
Name: new("bqstorage_row.proto"),
Package: new("bqstorage"),
MessageType: []*descriptorpb.DescriptorProto{
desc,
},
}
fd, err := protodesc.NewFile(fileDesc, nil)
if err != nil {
return nil, err
}
md := fd.Messages().ByName(protoreflect.Name(desc.GetName()))
if md == nil {
return nil, fmt.Errorf("descriptor %q not found in file", desc.GetName())
}
return md, nil
}
func dynamicMessageToDataRow(msg protoreflect.Message) (*enginepb.DataRow, error) {
fields := msg.Descriptor().Fields()
cells := make([]*enginepb.Cell, 0, fields.Len())
for i := 0; i < fields.Len(); i++ {
fd := fields.Get(i)
cell, err := fieldDescriptorToCell(msg, fd)
if err != nil {
return nil, err
}
cells = append(cells, cell)
}
return &enginepb.DataRow{Cells: cells}, nil
}
func fieldDescriptorToCell(msg protoreflect.Message, fd protoreflect.FieldDescriptor) (*enginepb.Cell, error) {
if fd.IsList() {
if !msg.Has(fd) {
return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: &enginepb.Array{}}}, nil
}
list := msg.Get(fd).List()
elems := make([]*enginepb.Cell, list.Len())
for i := 0; i < list.Len(); i++ {
elem, err := protoreflectValueToCell(list.Get(i), fd)
if err != nil {
return nil, err
}
elems[i] = elem
}
return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: &enginepb.Array{Elements: elems}}}, nil
}
if fd.Kind() == protoreflect.MessageKind {
if !msg.Has(fd) {
return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}, nil
}
return messageToStructCell(msg.Get(fd).Message())
}
if !msg.Has(fd) {
return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}, nil
}
return protoreflectValueToCell(msg.Get(fd), fd)
}
func messageToStructCell(msg protoreflect.Message) (*enginepb.Cell, error) {
fields := msg.Descriptor().Fields()
fieldCells := make([]*enginepb.Cell, fields.Len())
for i := 0; i < fields.Len(); i++ {
fd := fields.Get(i)
cell, err := fieldDescriptorToCell(msg, fd)
if err != nil {
return nil, err
}
fieldCells[i] = cell
}
return &enginepb.Cell{
Value: &enginepb.Cell_StructValue{
StructValue: &enginepb.Struct{Fields: fieldCells},
},
}, nil
}
func protoreflectValueToCell(v protoreflect.Value, fd protoreflect.FieldDescriptor) (*enginepb.Cell, error) {
switch fd.Kind() {
case protoreflect.MessageKind:
return messageToStructCell(v.Message())
case protoreflect.BoolKind:
return &enginepb.Cell{
Value: &enginepb.Cell_StringValue{StringValue: strconv.FormatBool(v.Bool())},
}, nil
case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind,
protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
return int64Cell(v.Int()), nil
case protoreflect.Uint32Kind, protoreflect.Fixed32Kind,
protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
return int64Cell(uint64ToSignedInt64(v.Uint())), nil
case protoreflect.FloatKind:
return &enginepb.Cell{
Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatFloat(float64(v.Float()), 'g', -1, 32),
},
}, nil
case protoreflect.DoubleKind:
return &enginepb.Cell{
Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatFloat(v.Float(), 'g', -1, 64),
},
}, nil
case protoreflect.StringKind:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: v.String()}}, nil
case protoreflect.BytesKind:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: string(v.Bytes())}}, nil
default:
return nil, fmt.Errorf("unsupported proto field kind %v", fd.Kind())
}
}
func int64Cell(n int64) *enginepb.Cell {
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: strconv.FormatInt(n, 10)}}
}
package bqstorage
import (
"context"
"errors"
"io"
"strconv"
"strings"
"sync"
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// ReadServer implements the public BigQueryRead gRPC service by adapting
// requests to the engine's internal StorageRead contract and encoding row
// pages as Arrow IPC record batches.
type readSessionState struct {
schema *enginepb.TableSchema
dataFormat storagepb.DataFormat
}
type ReadServer struct {
storagepb.UnimplementedBigQueryReadServer
engine *engine.Client
mu sync.RWMutex
sessions map[string]*readSessionState
}
func (s *ReadServer) requireEngine() error {
if s == nil || s.engine == nil || s.engine.StorageRead == nil {
return status.Error(codes.Unavailable, "BigQuery Storage Read API requires a running engine subprocess")
}
return nil
}
func (s *ReadServer) rememberSession(
name string,
schema *enginepb.TableSchema,
dataFormat storagepb.DataFormat,
) {
if name == "" || schema == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
if s.sessions == nil {
s.sessions = make(map[string]*readSessionState)
}
s.sessions[name] = &readSessionState{
schema: schema,
dataFormat: dataFormat,
}
}
func (s *ReadServer) sessionState(streamName string) *readSessionState {
sessionName := streamName
if i := strings.LastIndex(streamName, "/streams/"); i >= 0 {
sessionName = streamName[:i]
}
s.mu.RLock()
defer s.mu.RUnlock()
return s.sessions[sessionName]
}
func (s *ReadServer) CreateReadSession(
ctx context.Context,
req *storagepb.CreateReadSessionRequest,
) (*storagepb.ReadSession, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
dataFormat := storagepb.DataFormat_ARROW
if rs := req.GetReadSession(); rs != nil && rs.GetDataFormat() != storagepb.DataFormat_DATA_FORMAT_UNSPECIFIED {
dataFormat = rs.GetDataFormat()
}
session, err := s.engine.StorageRead.CreateReadSession(ctx, engineCreateReadSessionRequest(req))
if err != nil {
return nil, err
}
s.rememberSession(session.GetName(), session.GetSchema(), dataFormat)
return publicReadSessionFromEngine(session, dataFormat)
}
func (s *ReadServer) ReadRows(
req *storagepb.ReadRowsRequest,
stream storagepb.BigQueryRead_ReadRowsServer,
) error {
if err := s.requireEngine(); err != nil {
return err
}
ctx := stream.Context()
engineStream, err := s.engine.StorageRead.ReadRows(ctx, &enginepb.ReadRowsRequest{
ReadStream: req.GetReadStream(),
Offset: req.GetOffset(),
})
if err != nil {
return err
}
state := s.sessionState(req.GetReadStream())
dataFormat := storagepb.DataFormat_ARROW
if state != nil && state.dataFormat != storagepb.DataFormat_DATA_FORMAT_UNSPECIFIED {
dataFormat = state.dataFormat
}
return s.pumpEngineReadRows(engineStream, stream, state, dataFormat)
}
func (s *ReadServer) pumpEngineReadRows(
engineStream enginepb.StorageRead_ReadRowsClient,
stream storagepb.BigQueryRead_ReadRowsServer,
state *readSessionState,
dataFormat storagepb.DataFormat,
) error {
sentSchema := false
for {
page, recvErr := engineStream.Recv()
if errors.Is(recvErr, io.EOF) {
return nil
}
if recvErr != nil {
return recvErr
}
if len(page.GetRows()) == 0 {
continue
}
schema := (*enginepb.TableSchema)(nil)
if state != nil {
schema = state.schema
}
if schema == nil {
schema = inferSchemaFromRow(page.GetRows()[0])
}
resp, err := readRowsResponseForFormat(dataFormat, schema, page.GetRows())
if err != nil {
return status.Errorf(codes.Internal, "encode ReadRows batch: %v", err)
}
if !sentSchema {
if err := attachReadRowsSchema(resp, dataFormat, schema); err != nil {
return err
}
sentSchema = true
}
if err := stream.Send(resp); err != nil {
return err
}
}
}
func attachReadRowsSchema(
resp *storagepb.ReadRowsResponse,
dataFormat storagepb.DataFormat,
schema *enginepb.TableSchema,
) error {
switch dataFormat {
case storagepb.DataFormat_AVRO:
avroSchema, schemaErr := serializeAvroSchema(schema)
if schemaErr != nil {
return status.Errorf(codes.Internal, "encode Avro schema: %v", schemaErr)
}
resp.Schema = &storagepb.ReadRowsResponse_AvroSchema{AvroSchema: avroSchema}
default:
arrowSchema, schemaErr := serializeArrowSchema(schema)
if schemaErr != nil {
return status.Errorf(codes.Internal, "encode Arrow schema: %v", schemaErr)
}
resp.Schema = &storagepb.ReadRowsResponse_ArrowSchema{ArrowSchema: arrowSchema}
}
return nil
}
func readRowsResponseForFormat(
dataFormat storagepb.DataFormat,
schema *enginepb.TableSchema,
rows []*enginepb.DataRow,
) (*storagepb.ReadRowsResponse, error) {
switch dataFormat {
case storagepb.DataFormat_AVRO:
batch, err := rowsToAvroBatch(schema, rows)
if err != nil {
return nil, err
}
rowCount := int64(len(rows))
return &storagepb.ReadRowsResponse{
Rows: &storagepb.ReadRowsResponse_AvroRows{AvroRows: batch},
RowCount: rowCount,
}, nil
default:
batch, err := rowsToArrowBatch(schema, rows)
if err != nil {
return nil, err
}
rowCount := int64(len(rows))
return &storagepb.ReadRowsResponse{
Rows: &storagepb.ReadRowsResponse_ArrowRecordBatch{
ArrowRecordBatch: batch,
},
RowCount: rowCount,
}, nil
}
}
func (s *ReadServer) SplitReadStream(
ctx context.Context,
req *storagepb.SplitReadStreamRequest,
) (*storagepb.SplitReadStreamResponse, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
resp, err := s.engine.StorageRead.SplitReadStream(ctx, &enginepb.SplitReadStreamRequest{
Name: req.GetName(),
Fraction: req.GetFraction(),
})
if err != nil {
return nil, err
}
out := &storagepb.SplitReadStreamResponse{}
if primary := resp.GetPrimaryStream(); primary != nil {
out.PrimaryStream = &storagepb.ReadStream{Name: primary.GetName()}
}
if remainder := resp.GetRemainderStream(); remainder != nil {
out.RemainderStream = &storagepb.ReadStream{Name: remainder.GetName()}
}
return out, nil
}
func inferSchemaFromRow(row *enginepb.DataRow) *enginepb.TableSchema {
if row == nil {
return &enginepb.TableSchema{}
}
schema := &enginepb.TableSchema{}
for i := range row.GetCells() {
schema.Fields = append(schema.Fields, &enginepb.FieldSchema{
Name: columnName(i),
Type: bqTypeSTRING,
Mode: bqModeNullable,
})
}
return schema
}
func columnName(i int) string {
return "col_" + strconv.Itoa(i)
}
package bqstorage
import (
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"google.golang.org/grpc"
)
// RegisterGRPC wires the public BigQuery Storage Read/Write services onto
// srv. The gateway calls this during startup so client libraries dialing
// BIGQUERY_STORAGE_GRPC_ENDPOINT reach google.cloud.bigquery.storage.v1
// rather than the engine-internal bigquery_emulator.v1.* service names.
func RegisterGRPC(srv grpc.ServiceRegistrar, eng *engine.Client) {
if srv == nil {
return
}
read := &ReadServer{engine: eng}
write := &WriteServer{engine: eng}
storagepb.RegisterBigQueryReadServer(srv, read)
storagepb.RegisterBigQueryWriteServer(srv, write)
}
// Package bqstorage is the public BigQuery Storage gRPC shim. It registers
// google.cloud.bigquery.storage.v1.BigQueryRead / BigQueryWrite on the
// gateway listener and adapts RPCs to the engine's internal
// bigquery_emulator.v1.StorageRead / StorageWrite contracts.
package bqstorage
import (
"context"
"errors"
"io"
"strings"
"time"
"cloud.google.com/go/bigquery/storage/apiv1/storagepb"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/descriptorpb"
"google.golang.org/protobuf/types/known/timestamppb"
"google.golang.org/protobuf/types/known/wrapperspb"
)
// WriteServer implements the public BigQueryWrite gRPC service.
type WriteServer struct {
storagepb.UnimplementedBigQueryWriteServer
engine *engine.Client
}
func (s *WriteServer) requireEngine() error {
if s == nil || s.engine == nil || s.engine.StorageWrite == nil {
return status.Error(codes.Unavailable, "BigQuery Storage Write API requires a running engine subprocess")
}
return nil
}
func (s *WriteServer) CreateWriteStream(
ctx context.Context,
req *storagepb.CreateWriteStreamRequest,
) (*storagepb.WriteStream, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
streamType := storagepb.WriteStream_COMMITTED
if ws := req.GetWriteStream(); ws != nil &&
ws.GetType() != storagepb.WriteStream_TYPE_UNSPECIFIED {
streamType = ws.GetType()
}
if streamType == storagepb.WriteStream_COMMITTED ||
streamType == storagepb.WriteStream_TYPE_UNSPECIFIED {
return s.defaultWriteStream(ctx, req.GetParent())
}
stream, err := s.engine.StorageWrite.CreateWriteStream(ctx, &enginepb.CreateWriteStreamRequest{
Parent: req.GetParent(),
WriteStream: engineWriteStreamFromPublic(req.GetWriteStream()),
})
if err != nil {
return nil, err
}
return publicWriteStreamFromEngine(stream), nil
}
func defaultWriteStreamName(parent string) string {
return strings.TrimRight(parent, "/") + "/streams/_default"
}
func (s *WriteServer) defaultWriteStream(
ctx context.Context,
parent string,
) (*storagepb.WriteStream, error) {
name := defaultWriteStreamName(parent)
existing, err := s.engine.StorageWrite.GetWriteStream(ctx, &enginepb.GetWriteStreamRequest{
Name: name,
})
if err == nil {
out := publicWriteStreamFromEngine(existing)
out.Name = name
out.Type = storagepb.WriteStream_COMMITTED
return out, nil
}
// Mint schema metadata via CreateWriteStream; the engine registers the
// reserved _default stream lazily on the first AppendRows.
probe, err := s.engine.StorageWrite.CreateWriteStream(ctx, &enginepb.CreateWriteStreamRequest{
Parent: parent,
WriteStream: &enginepb.WriteStream{
Type: enginepb.WriteStream_COMMITTED,
},
})
if err != nil {
return nil, err
}
out := publicWriteStreamFromEngine(probe)
out.Name = name
out.Type = storagepb.WriteStream_COMMITTED
return out, nil
}
func (s *WriteServer) AppendRows(stream storagepb.BigQueryWrite_AppendRowsServer) error {
if err := s.requireEngine(); err != nil {
return err
}
ctx := stream.Context()
engineStream, err := s.engine.StorageWrite.AppendRows(ctx)
if err != nil {
return err
}
var cachedProtoDesc *descriptorpb.DescriptorProto
for {
req, recvErr := stream.Recv()
if errors.Is(recvErr, io.EOF) {
return nil
}
if recvErr != nil {
return recvErr
}
engineReq, convErr := s.publicAppendRequestToEngine(ctx, req, &cachedProtoDesc)
if convErr != nil {
return status.Errorf(codes.InvalidArgument, "decode AppendRows: %v", convErr)
}
if err := engineStream.Send(engineReq); err != nil {
return err
}
engineResp, err := engineStream.Recv()
if err != nil {
return err
}
if err := stream.Send(publicAppendResponseFromEngine(req.GetWriteStream(), engineResp)); err != nil {
return err
}
}
}
func (s *WriteServer) GetWriteStream(
ctx context.Context,
req *storagepb.GetWriteStreamRequest,
) (*storagepb.WriteStream, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
stream, err := s.engine.StorageWrite.GetWriteStream(ctx, &enginepb.GetWriteStreamRequest{
Name: req.GetName(),
})
if err == nil {
return publicWriteStreamFromEngine(stream), nil
}
if before, ok := strings.CutSuffix(req.GetName(), "/streams/_default"); ok {
parent := before
return s.defaultWriteStream(ctx, parent)
}
return nil, err
}
func (s *WriteServer) FinalizeWriteStream(
ctx context.Context,
req *storagepb.FinalizeWriteStreamRequest,
) (*storagepb.FinalizeWriteStreamResponse, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
resp, err := s.engine.StorageWrite.FinalizeWriteStream(ctx, &enginepb.FinalizeWriteStreamRequest{
Name: req.GetName(),
})
if err != nil {
return nil, err
}
return &storagepb.FinalizeWriteStreamResponse{
RowCount: resp.GetRowCount(),
}, nil
}
func (s *WriteServer) BatchCommitWriteStreams(
ctx context.Context,
req *storagepb.BatchCommitWriteStreamsRequest,
) (*storagepb.BatchCommitWriteStreamsResponse, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
resp, err := s.engine.StorageWrite.BatchCommitWriteStreams(
ctx,
&enginepb.BatchCommitWriteStreamsRequest{
Parent: req.GetParent(),
WriteStreams: append([]string(nil), req.GetWriteStreams()...),
},
)
if err != nil {
return nil, err
}
out := &storagepb.BatchCommitWriteStreamsResponse{}
if ts := resp.GetCommitTime(); ts != "" {
if t, parseErr := time.Parse(time.RFC3339, ts); parseErr == nil {
out.CommitTime = timestamppb.New(t)
}
}
return out, nil
}
func (s *WriteServer) FlushRows(
ctx context.Context,
req *storagepb.FlushRowsRequest,
) (*storagepb.FlushRowsResponse, error) {
if err := s.requireEngine(); err != nil {
return nil, err
}
offset := int64(0)
if req.GetOffset() != nil {
offset = req.GetOffset().GetValue()
}
resp, err := s.engine.StorageWrite.FlushRows(ctx, &enginepb.FlushRowsRequest{
WriteStream: req.GetWriteStream(),
Offset: offset,
})
if err != nil {
return nil, err
}
return &storagepb.FlushRowsResponse{Offset: resp.GetOffset()}, nil
}
func (s *WriteServer) publicAppendRequestToEngine(
ctx context.Context,
req *storagepb.AppendRowsRequest,
cachedProtoDesc **descriptorpb.DescriptorProto,
) (*enginepb.AppendRowsRequest, error) {
if req == nil {
return nil, status.Error(codes.InvalidArgument, "nil AppendRowsRequest")
}
out := &enginepb.AppendRowsRequest{
WriteStream: req.GetWriteStream(),
TraceId: req.GetTraceId(),
}
if req.GetOffset() != nil {
out.Offset = req.GetOffset().GetValue()
}
switch rows := req.GetRows().(type) {
case *storagepb.AppendRowsRequest_ProtoRows:
engineRows, err := protoDataToEngineRows(
ctx,
s.engine,
req.GetWriteStream(),
rows.ProtoRows,
cachedProtoDesc,
)
if err != nil {
return nil, err
}
out.ProtoRows = &enginepb.AppendRowsRequest_ProtoData{Rows: engineRows}
case *storagepb.AppendRowsRequest_ArrowRows:
return nil, status.Error(
codes.Unimplemented,
"Arrow AppendRows is not implemented by the emulator storage shim",
)
default:
return out, nil
}
return out, nil
}
func publicAppendResponseFromEngine(
writeStream string,
in *enginepb.AppendRowsResponse,
) *storagepb.AppendRowsResponse {
if in == nil {
return &storagepb.AppendRowsResponse{WriteStream: writeStream}
}
out := &storagepb.AppendRowsResponse{WriteStream: writeStream}
if msg := in.GetErrorMessage(); msg != "" {
out.Response = &storagepb.AppendRowsResponse_Error{
Error: status.New(codes.InvalidArgument, msg).Proto(),
}
return out
}
result := &storagepb.AppendRowsResponse_AppendResult{}
if ar := in.GetAppendResult(); ar != nil {
result.Offset = wrapperspb.Int64(ar.GetOffset())
}
out.Response = &storagepb.AppendRowsResponse_AppendResult_{
AppendResult: result,
}
return out
}
package bqv2grpc
import (
"strconv"
"strings"
"time"
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/routines"
"google.golang.org/protobuf/types/known/wrapperspb"
)
const (
datasetKind = "bigquery#dataset"
tableKind = "bigquery#table"
jobKind = "bigquery#job"
)
func nowMillis() int64 {
return time.Now().UnixMilli()
}
func datasetFromREST(projectID, datasetID string, ds bqtypes.Dataset) *bigquerypb.Dataset {
if ds.Labels == nil {
ds.Labels = bqtypes.ResourceLabels{}
}
if ds.Access == nil {
ds.Access = []map[string]any{}
}
if ds.Location == "" {
ds.Location = "US"
}
ct := parseMillis(ds.CreationTime)
if ct == 0 {
ct = nowMillis()
}
lmt := parseMillis(ds.LastModifiedTime)
if lmt == 0 {
lmt = nowMillis()
}
out := &bigquerypb.Dataset{
Kind: datasetKind,
Id: projectID + ":" + datasetID,
DatasetReference: &bigquerypb.DatasetReference{ProjectId: projectID, DatasetId: datasetID},
Location: ds.Location,
Labels: map[string]string(ds.Labels),
CreationTime: ct,
LastModifiedTime: lmt,
Etag: ds.Etag,
}
if ds.FriendlyName != "" {
out.FriendlyName = wrapperspb.String(ds.FriendlyName)
}
if ds.Description != "" {
out.Description = wrapperspb.String(ds.Description)
}
return out
}
func datasetToREST(ds *bigquerypb.Dataset) bqtypes.Dataset {
if ds == nil {
return bqtypes.Dataset{}
}
out := bqtypes.Dataset{
Kind: ds.GetKind(),
ID: ds.GetId(),
FriendlyName: ds.GetFriendlyName().GetValue(),
Description: ds.GetDescription().GetValue(),
Location: ds.GetLocation(),
Etag: ds.GetEtag(),
CreationTime: formatMillis(ds.GetCreationTime()),
LastModifiedTime: formatMillis(ds.GetLastModifiedTime()),
Labels: bqtypes.ResourceLabels(ds.GetLabels()),
Access: []map[string]any{},
}
if ref := ds.GetDatasetReference(); ref != nil {
out.DatasetReference = bqtypes.DatasetReference{
ProjectID: ref.GetProjectId(),
DatasetID: ref.GetDatasetId(),
}
}
return out
}
func listDatasetFromRef(projectID, datasetID string, labels map[string]string) *bigquerypb.ListFormatDataset {
if labels == nil {
labels = map[string]string{}
}
return &bigquerypb.ListFormatDataset{
Kind: datasetKind,
Id: projectID + ":" + datasetID,
DatasetReference: &bigquerypb.DatasetReference{
ProjectId: projectID,
DatasetId: datasetID,
},
Labels: labels,
}
}
func tableFromREST(projectID, datasetID, tableID string, t bqtypes.Table) *bigquerypb.Table {
if t.Labels == nil {
t.Labels = bqtypes.ResourceLabels{}
}
if t.Type == "" {
t.Type = tableTypeTable
}
if t.Location == "" {
t.Location = "US"
}
ct := parseMillis(t.CreationTime)
if ct == 0 {
ct = nowMillis()
}
lmt := parseMillis(t.LastModifiedTime)
if lmt == 0 {
lmt = nowMillis()
}
out := &bigquerypb.Table{
Kind: tableKind,
Id: projectID + ":" + datasetID + "." + tableID,
TableReference: &bigquerypb.TableReference{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
Type: t.Type,
Labels: map[string]string(t.Labels),
CreationTime: ct,
LastModifiedTime: uint64FromNonNegativeInt64(lmt),
Etag: t.Etag,
Location: t.Location,
Schema: schemaToProto(t.Schema),
}
if n := parseInt64(t.NumRows); n > 0 {
out.NumRows = wrapperspb.UInt64(uint64(n))
}
if n := parseInt64(t.NumBytes); n > 0 {
out.NumBytes = wrapperspb.Int64(n)
}
if t.FriendlyName != "" {
out.FriendlyName = wrapperspb.String(t.FriendlyName)
}
if t.Description != "" {
out.Description = wrapperspb.String(t.Description)
}
return out
}
func tableToREST(t *bigquerypb.Table) bqtypes.Table {
if t == nil {
return bqtypes.Table{}
}
out := bqtypes.Table{
Kind: t.GetKind(),
ID: t.GetId(),
FriendlyName: t.GetFriendlyName().GetValue(),
Description: t.GetDescription().GetValue(),
Type: t.GetType(),
Etag: t.GetEtag(),
CreationTime: formatMillis(t.GetCreationTime()),
LastModifiedTime: formatMillis(int64FromUint64(t.GetLastModifiedTime())),
NumRows: formatUInt64(t.GetNumRows().GetValue()),
NumBytes: formatInt64(t.GetNumBytes().GetValue()),
Location: t.GetLocation(),
Labels: bqtypes.ResourceLabels(t.GetLabels()),
Schema: schemaFromProto(t.GetSchema()),
}
if ref := t.GetTableReference(); ref != nil {
out.TableReference = bqtypes.TableReference{
ProjectID: ref.GetProjectId(),
DatasetID: ref.GetDatasetId(),
TableID: ref.GetTableId(),
}
}
return out
}
func listTableFromRef(
projectID, datasetID, tableID, tableType string,
labels map[string]string,
) *bigquerypb.ListFormatTable {
if labels == nil {
labels = map[string]string{}
}
if tableType == "" {
tableType = "TABLE"
}
return &bigquerypb.ListFormatTable{
Kind: tableKind,
Id: projectID + ":" + datasetID + "." + tableID,
TableReference: &bigquerypb.TableReference{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
Type: tableType,
Labels: labels,
}
}
func schemaToProto(s *bqtypes.TableSchema) *bigquerypb.TableSchema {
if s == nil {
return nil
}
out := &bigquerypb.TableSchema{Fields: make([]*bigquerypb.TableFieldSchema, 0, len(s.Fields))}
for i := range s.Fields {
out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
}
return out
}
func fieldToProto(f bqtypes.TableFieldSchema) *bigquerypb.TableFieldSchema {
out := &bigquerypb.TableFieldSchema{
Name: f.Name,
Type: f.Type,
Mode: f.Mode,
}
if f.Description != "" {
out.Description = wrapperspb.String(f.Description)
}
for i := range f.Fields {
out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
}
return out
}
func schemaFromProto(s *bigquerypb.TableSchema) *bqtypes.TableSchema {
if s == nil || len(s.GetFields()) == 0 {
return nil
}
out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, 0, len(s.GetFields()))}
for _, f := range s.GetFields() {
out.Fields = append(out.Fields, fieldFromProto(f))
}
return out
}
func fieldFromProto(f *bigquerypb.TableFieldSchema) bqtypes.TableFieldSchema {
fieldType := normalizeRESTFieldType(f.GetType())
if strings.EqualFold(fieldType, "STRUCT") {
fieldType = "RECORD"
}
out := bqtypes.TableFieldSchema{
Name: f.GetName(),
Type: fieldType,
Mode: f.GetMode(),
Description: f.GetDescription().GetValue(),
}
for _, sub := range f.GetFields() {
out.Fields = append(out.Fields, fieldFromProto(sub))
}
return out
}
func normalizeRESTFieldType(t string) string {
switch strings.ToUpper(strings.TrimSpace(t)) {
case "INT64":
return "INTEGER"
case "FLOAT64":
return "FLOAT"
case "BOOL":
return "BOOLEAN"
default:
return t
}
}
func schemaToEngine(s *bigquerypb.TableSchema) *enginepb.TableSchema {
if s == nil {
return nil
}
out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.GetFields()))}
for _, f := range s.GetFields() {
out.Fields = append(out.Fields, engineFieldFromProto(f))
}
return out
}
func engineFieldFromProto(f *bigquerypb.TableFieldSchema) *enginepb.FieldSchema {
out := &enginepb.FieldSchema{
Name: f.GetName(),
Type: f.GetType(),
Mode: f.GetMode(),
Description: f.GetDescription().GetValue(),
}
for _, sub := range f.GetFields() {
out.Fields = append(out.Fields, engineFieldFromProto(sub))
}
return out
}
func jobReferenceToProto(ref bqtypes.JobReference) *bigquerypb.JobReference {
out := &bigquerypb.JobReference{
ProjectId: ref.ProjectID,
JobId: ref.JobID,
}
if ref.Location != "" {
out.Location = wrapperspb.String(ref.Location)
}
return out
}
func jobListEntryToProto(j *jobs.Job) *bigquerypb.ListFormatJob {
if j == nil {
return nil
}
out := &bigquerypb.ListFormatJob{
Kind: jobKind,
Id: j.ID,
JobReference: jobReferenceToProto(j.JobReference),
State: j.Status.State,
Status: &bigquerypb.JobStatus{
State: j.Status.State,
},
Statistics: &bigquerypb.JobStatistics{
CreationTime: parseMillis(j.Statistics.CreationTime),
StartTime: parseMillis(j.Statistics.StartTime),
EndTime: parseMillis(j.Statistics.EndTime),
},
}
if j.Configuration != nil {
out.Configuration = &bigquerypb.JobConfiguration{
JobType: j.Configuration.JobType,
}
}
return out
}
func routineFromREST(projectID, datasetID, routineID string, rt bqtypes.Routine) *bigquerypb.Routine {
ct := parseMillis(rt.CreationTime)
if ct == 0 {
ct = nowMillis()
}
lmt := parseMillis(rt.LastModifiedTime)
if lmt == 0 {
lmt = nowMillis()
}
out := &bigquerypb.Routine{
Etag: rt.Etag,
RoutineReference: &bigquerypb.RoutineReference{
ProjectId: projectID,
DatasetId: datasetID,
RoutineId: routineID,
},
RoutineType: routineTypeToProto(string(rt.RoutineType)),
Language: routineLanguageToProto(string(rt.Language)),
DefinitionBody: rt.DefinitionBody,
CreationTime: ct,
LastModifiedTime: lmt,
}
if rt.Etag == "" {
out.Etag = routines.MintEtag()
}
return out
}
func routineToREST(rt *bigquerypb.Routine) bqtypes.Routine {
if rt == nil {
return bqtypes.Routine{}
}
out := bqtypes.Routine{
Etag: rt.GetEtag(),
RoutineType: bqtypes.RoutineType(routineTypeFromProto(rt.GetRoutineType())),
Language: bqtypes.RoutineLanguage(routineLanguageFromProto(rt.GetLanguage())),
DefinitionBody: rt.GetDefinitionBody(),
CreationTime: formatMillis(rt.GetCreationTime()),
LastModifiedTime: formatMillis(rt.GetLastModifiedTime()),
}
if ref := rt.GetRoutineReference(); ref != nil {
out.RoutineReference = bqtypes.RoutineReference{
ProjectID: ref.GetProjectId(),
DatasetID: ref.GetDatasetId(),
RoutineID: ref.GetRoutineId(),
}
}
return out
}
func routineTypeToProto(s string) bigquerypb.Routine_RoutineType {
switch strings.ToUpper(strings.TrimSpace(s)) {
case "PROCEDURE":
return bigquerypb.Routine_PROCEDURE
case "TABLE_VALUED_FUNCTION":
return bigquerypb.Routine_TABLE_VALUED_FUNCTION
default:
return bigquerypb.Routine_SCALAR_FUNCTION
}
}
func routineTypeFromProto(t bigquerypb.Routine_RoutineType) string {
switch t {
case bigquerypb.Routine_PROCEDURE:
return "PROCEDURE"
case bigquerypb.Routine_TABLE_VALUED_FUNCTION:
return "TABLE_VALUED_FUNCTION"
default:
return "SCALAR_FUNCTION"
}
}
func routineLanguageToProto(s string) bigquerypb.Routine_Language {
if strings.EqualFold(s, "JAVASCRIPT") {
return bigquerypb.Routine_JAVASCRIPT
}
return bigquerypb.Routine_SQL
}
func routineLanguageFromProto(l bigquerypb.Routine_Language) string {
if l == bigquerypb.Routine_JAVASCRIPT {
return "JAVASCRIPT"
}
return "SQL"
}
func parseInt64(s string) int64 {
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return 0
}
return n
}
func formatInt64(n int64) string {
if n == 0 {
return "0"
}
return strconv.FormatInt(n, 10)
}
func formatUInt64(n uint64) string {
if n == 0 {
return "0"
}
return strconv.FormatUint(n, 10)
}
package bqv2grpc
import (
"context"
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"google.golang.org/protobuf/types/known/emptypb"
)
// DatasetServer implements google.cloud.bigquery.v2.DatasetService.
type DatasetServer struct {
bigquerypb.UnimplementedDatasetServiceServer
deps handlers.Dependencies
}
func newDatasetServer(deps handlers.Dependencies) *DatasetServer {
return &DatasetServer{deps: deps}
}
// ListDatasets lists datasets from the engine catalog.
func (s *DatasetServer) ListDatasets(
ctx context.Context,
req *bigquerypb.ListDatasetsRequest,
) (*bigquerypb.DatasetList, error) {
projectID := req.GetProjectId()
if s.deps.Catalog == nil {
return &bigquerypb.DatasetList{
Kind: "bigquery#datasetList",
Datasets: []*bigquerypb.ListFormatDataset{},
}, nil
}
resp, err := s.deps.Catalog.ListDatasets(ctx, &enginepb.ListDatasetsRequest{
ProjectId: projectID,
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
items := make([]*bigquerypb.ListFormatDataset, 0, len(resp.GetDatasets()))
for _, ref := range resp.GetDatasets() {
labels := map[string]string{}
if overlay, ok := s.deps.Metadata.GetDataset(
ref.GetProjectId(), ref.GetDatasetId(),
); ok && overlay.Labels != nil {
labels = map[string]string(overlay.Labels)
}
items = append(items, listDatasetFromRef(
ref.GetProjectId(), ref.GetDatasetId(), labels))
}
return &bigquerypb.DatasetList{
Kind: "bigquery#datasetList",
Datasets: items,
}, nil
}
// InsertDataset registers a dataset in the engine catalog.
func (s *DatasetServer) InsertDataset(
ctx context.Context,
req *bigquerypb.InsertDatasetRequest,
) (*bigquerypb.Dataset, error) {
projectID := req.GetProjectId()
ds := datasetToREST(req.GetDataset())
datasetID := ds.DatasetReference.DatasetID
if datasetID == "" {
return nil, invalidArg("datasetReference.datasetId is required")
}
if s.deps.Catalog == nil {
return nil, unimplemented("dataset insert requires an engine")
}
location := ds.Location
if location == "" {
location = "US"
}
_, err := s.deps.Catalog.RegisterDataset(ctx, &enginepb.RegisterDatasetRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
Location: location,
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
s.deps.Metadata.PutDataset(projectID, datasetID, ds)
return datasetFromREST(projectID, datasetID, ds), nil
}
// GetDataset returns dataset metadata.
func (s *DatasetServer) GetDataset(
ctx context.Context,
req *bigquerypb.GetDatasetRequest,
) (*bigquerypb.Dataset, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
exists, err := catalogDatasetExists(ctx, s.deps, projectID, datasetID)
if err != nil {
return nil, grpcStatusFromEngine(err)
}
if !exists {
return nil, datasetNotFound(projectID, datasetID)
}
ds := datasetToREST(&bigquerypb.Dataset{})
if overlay, ok := s.deps.Metadata.GetDataset(projectID, datasetID); ok {
ds = overlay
}
return datasetFromREST(projectID, datasetID, ds), nil
}
// UpdateDataset replaces dataset metadata in the store.
func (s *DatasetServer) UpdateDataset(
ctx context.Context,
req *bigquerypb.UpdateOrPatchDatasetRequest,
) (*bigquerypb.Dataset, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
exists, err := catalogDatasetExists(ctx, s.deps, projectID, datasetID)
if err != nil {
return nil, grpcStatusFromEngine(err)
}
if !exists {
return nil, datasetNotFound(projectID, datasetID)
}
ds := datasetToREST(req.GetDataset())
s.deps.Metadata.PutDataset(projectID, datasetID, ds)
return datasetFromREST(projectID, datasetID, ds), nil
}
// PatchDataset merges dataset metadata in the store.
func (s *DatasetServer) PatchDataset(
ctx context.Context,
req *bigquerypb.UpdateOrPatchDatasetRequest,
) (*bigquerypb.Dataset, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
exists, err := catalogDatasetExists(ctx, s.deps, projectID, datasetID)
if err != nil {
return nil, grpcStatusFromEngine(err)
}
if !exists {
return nil, datasetNotFound(projectID, datasetID)
}
ds := datasetToREST(req.GetDataset())
s.deps.Metadata.MergeDataset(projectID, datasetID, ds)
if overlay, ok := s.deps.Metadata.GetDataset(projectID, datasetID); ok {
ds = overlay
}
return datasetFromREST(projectID, datasetID, ds), nil
}
// DeleteDataset drops a dataset from the engine catalog.
func (s *DatasetServer) DeleteDataset(
ctx context.Context,
req *bigquerypb.DeleteDatasetRequest,
) (*emptypb.Empty, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
if s.deps.Catalog == nil {
return nil, unimplemented("dataset delete requires an engine")
}
_, err := s.deps.Catalog.DropDataset(ctx, &enginepb.DropDatasetRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
DeleteContents: req.GetDeleteContents(),
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
s.deps.Metadata.DeleteDataset(projectID, datasetID)
if req.GetDeleteContents() {
s.deps.Metadata.DeleteTablesInDataset(projectID, datasetID)
}
return &emptypb.Empty{}, nil
}
func catalogDatasetExists(
ctx context.Context,
deps handlers.Dependencies,
projectID, datasetID string,
) (bool, error) {
if deps.Catalog == nil {
return true, nil
}
resp, err := deps.Catalog.ListDatasets(ctx, &enginepb.ListDatasetsRequest{
ProjectId: projectID,
})
if err != nil {
return false, err
}
for _, ref := range resp.GetDatasets() {
if ref.GetDatasetId() == datasetID {
return true, nil
}
}
return false, nil
}
package bqv2grpc
import (
"regexp"
"strconv"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
var (
notFoundResourceRE = regexp.MustCompile(
`^(table|dataset) not found: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
alreadyExistsResourceRE = regexp.MustCompile(
`^(table|dataset) already exists: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
)
// grpcStatusFromEngine maps engine gRPC errors to client-facing status codes.
func grpcStatusFromEngine(err error) error {
if err == nil {
return nil
}
st, ok := status.FromError(err)
if !ok {
return status.Errorf(codes.Internal, "Engine RPC failed: %v", err)
}
switch st.Code() {
case codes.OK:
return nil
case codes.NotFound, codes.AlreadyExists, codes.InvalidArgument,
codes.FailedPrecondition, codes.PermissionDenied, codes.Unauthenticated,
codes.Unimplemented, codes.Unavailable, codes.DeadlineExceeded,
codes.ResourceExhausted:
return status.Error(st.Code(), bqStyleMessage(st.Message()))
default:
return status.Errorf(codes.Internal, "%s", bqStyleMessage(st.Message()))
}
}
func bqStyleMessage(msg string) string {
if m := notFoundResourceRE.FindStringSubmatch(msg); m != nil {
return bqStyleResourceMessage("Not found", m[1], m[2], m[3], m[4])
}
if m := alreadyExistsResourceRE.FindStringSubmatch(msg); m != nil {
return bqStyleResourceMessage("Already Exists", m[1], m[2], m[3], m[4])
}
return msg
}
func bqStyleResourceMessage(verb, noun, project, dataset, table string) string {
resource := project + ":" + dataset
if table != "" {
resource += "." + table
}
switch noun {
case "table":
return verb + ": Table " + resource
case "dataset":
return verb + ": Dataset " + resource
default:
return verb + ": " + noun + " " + resource
}
}
func datasetNotFound(projectID, datasetID string) error {
return status.Errorf(codes.NotFound, "Not found: Dataset %s:%s", projectID, datasetID)
}
func routineNotFound(projectID, datasetID, routineID string) error {
return status.Errorf(codes.NotFound, "Not found: Routine %s:%s.%s", projectID, datasetID, routineID)
}
func invalidArg(msg string) error {
return status.Error(codes.InvalidArgument, msg)
}
func unimplemented(msg string) error {
return status.Error(codes.Unimplemented, msg)
}
func parseMillis(s string) int64 {
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return 0
}
return n
}
func formatMillis(n int64) string {
if n == 0 {
return ""
}
return strconv.FormatInt(n, 10)
}
package bqv2grpc
import (
"context"
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
// JobServer implements google.cloud.bigquery.v2.JobService (ListJobs only).
type JobServer struct {
bigquerypb.UnimplementedJobServiceServer
deps handlers.Dependencies
}
func newJobServer(deps handlers.Dependencies) *JobServer {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return &JobServer{deps: deps}
}
// ListJobs returns jobs from the in-memory registry.
func (s *JobServer) ListJobs(
_ context.Context,
req *bigquerypb.ListJobsRequest,
) (*bigquerypb.JobList, error) {
if req.GetAllUsers() {
return nil, unimplemented(
"jobs.list with allUsers=true is not supported; " +
"the emulator has no auth context to scope cross-user listings.")
}
opts := jobs.ListOptions{
MaxResults: int(req.GetMaxResults().GetValue()),
PageToken: req.GetPageToken(),
ParentJobID: req.GetParentJobId(),
MinCreationTime: int64FromUint64(req.GetMinCreationTime()),
MaxCreationTime: int64FromUint64(req.GetMaxCreationTime().GetValue()),
StateFilter: stateFiltersFromProto(req.GetStateFilter()),
}
items, nextPageToken := s.deps.Jobs.ListByProject(req.GetProjectId(), opts)
out := make([]*bigquerypb.ListFormatJob, 0, len(items))
for _, j := range items {
out = append(out, jobListEntryToProto(j))
}
resp := &bigquerypb.JobList{
Kind: "bigquery#jobList",
Jobs: out,
}
if nextPageToken != "" {
resp.NextPageToken = nextPageToken
}
return resp, nil
}
func stateFiltersFromProto(filters []bigquerypb.ListJobsRequest_StateFilter) []string {
if len(filters) == 0 {
return nil
}
out := make([]string, 0, len(filters))
for _, f := range filters {
switch f {
case bigquerypb.ListJobsRequest_PENDING:
out = append(out, "pending")
case bigquerypb.ListJobsRequest_RUNNING:
out = append(out, "running")
case bigquerypb.ListJobsRequest_DONE:
out = append(out, "done")
}
}
return out
}
package bqv2grpc
import "math"
func uint64FromNonNegativeInt64(v int64) uint64 {
if v < 0 {
return 0
}
return uint64(v)
}
func int64FromUint64(v uint64) int64 {
if v > uint64(math.MaxInt64) {
return math.MaxInt64
}
return int64(v)
}
func int32FromInt(v int) int32 {
if v > int(math.MaxInt32) {
return math.MaxInt32
}
if v < int(math.MinInt32) {
return math.MinInt32
}
return int32(v)
}
package bqv2grpc
import (
"context"
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
)
// ProjectServer implements google.cloud.bigquery.v2.ProjectService.
type ProjectServer struct {
bigquerypb.UnimplementedProjectServiceServer
}
func newProjectServer(_ handlers.Dependencies) *ProjectServer {
return &ProjectServer{}
}
// GetServiceAccount returns the emulator's synthetic service account email.
func (s *ProjectServer) GetServiceAccount(
_ context.Context,
req *bigquerypb.GetServiceAccountRequest,
) (*bigquerypb.GetServiceAccountResponse, error) {
projectID := req.GetProjectId()
if projectID == "" {
projectID = "test-project"
}
return &bigquerypb.GetServiceAccountResponse{
Kind: "bigquery#getServiceAccountResponse",
Email: "bigquery-emulator@" + projectID + ".iam.gserviceaccount.com",
}, nil
}
package bqv2grpc
import (
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"google.golang.org/grpc"
)
// RegisterGRPC wires all BigQuery v2 gRPC services onto srv. Unimplemented
// methods on each embedded server return UNIMPLEMENTED automatically.
func RegisterGRPC(srv grpc.ServiceRegistrar, deps handlers.Dependencies) {
if srv == nil {
return
}
bigquerypb.RegisterDatasetServiceServer(srv, newDatasetServer(deps))
bigquerypb.RegisterTableServiceServer(srv, newTableServer(deps))
bigquerypb.RegisterJobServiceServer(srv, newJobServer(deps))
bigquerypb.RegisterProjectServiceServer(srv, newProjectServer(deps))
bigquerypb.RegisterRoutineServiceServer(srv, newRoutineServer(deps))
bigquerypb.RegisterModelServiceServer(srv, &ModelServer{})
bigquerypb.RegisterRowAccessPolicyServiceServer(srv, &RowAccessPolicyServer{})
}
// ModelServer stubs google.cloud.bigquery.v2.ModelService.
type ModelServer struct {
bigquerypb.UnimplementedModelServiceServer
}
// RowAccessPolicyServer stubs google.cloud.bigquery.v2.RowAccessPolicyService.
type RowAccessPolicyServer struct {
bigquerypb.UnimplementedRowAccessPolicyServiceServer
}
package bqv2grpc
import (
"context"
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"github.com/vantaboard/bigquery-emulator/gateway/routines"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/types/known/emptypb"
)
// RoutineServer implements google.cloud.bigquery.v2.RoutineService.
type RoutineServer struct {
bigquerypb.UnimplementedRoutineServiceServer
deps handlers.Dependencies
}
func newRoutineServer(deps handlers.Dependencies) *RoutineServer {
return &RoutineServer{deps: deps}
}
func (s *RoutineServer) routineStore() *routines.Store {
if s.deps.Routines == nil {
s.deps.Routines = routines.NewStore()
}
return s.deps.Routines
}
// ListRoutines returns routines from the in-memory store.
func (s *RoutineServer) ListRoutines(
_ context.Context,
req *bigquerypb.ListRoutinesRequest,
) (*bigquerypb.ListRoutinesResponse, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
all := s.routineStore().List(projectID, datasetID, req.GetFilter())
items := make([]*bigquerypb.Routine, 0, len(all))
for _, rt := range all {
items = append(items, routineFromREST(
rt.RoutineReference.ProjectID,
rt.RoutineReference.DatasetID,
rt.RoutineReference.RoutineID,
rt,
))
}
return &bigquerypb.ListRoutinesResponse{Routines: items}, nil
}
// GetRoutine returns a routine from the in-memory store.
func (s *RoutineServer) GetRoutine(
_ context.Context,
req *bigquerypb.GetRoutineRequest,
) (*bigquerypb.Routine, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
routineID := req.GetRoutineId()
rt, ok := s.routineStore().Get(projectID, datasetID, routineID)
if !ok {
return nil, routineNotFound(projectID, datasetID, routineID)
}
return routineFromREST(projectID, datasetID, routineID, rt), nil
}
// InsertRoutine registers a new routine.
func (s *RoutineServer) InsertRoutine(
_ context.Context,
req *bigquerypb.InsertRoutineRequest,
) (*bigquerypb.Routine, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
rt := routineToREST(req.GetRoutine())
routineID := rt.RoutineReference.RoutineID
if routineID == "" {
return nil, invalidArg("Required routineReference.routineId is missing.")
}
if rt.DefinitionBody == "" {
return nil, invalidArg("Required definitionBody is missing.")
}
if rt.RoutineType == "" {
rt.RoutineType = "SCALAR_FUNCTION"
}
if rt.Language == "" {
rt.Language = "SQL"
}
out := routineFromREST(projectID, datasetID, routineID, rt)
rest := routineToREST(out)
if !s.routineStore().Insert(rest) {
return nil, status.Errorf(codes.AlreadyExists,
"Already Exists: Routine %s:%s.%s", projectID, datasetID, routineID)
}
return out, nil
}
// UpdateRoutine replaces an existing routine.
func (s *RoutineServer) UpdateRoutine(
_ context.Context,
req *bigquerypb.UpdateRoutineRequest,
) (*bigquerypb.Routine, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
routineID := req.GetRoutineId()
existing, ok := s.routineStore().Get(projectID, datasetID, routineID)
if !ok {
return nil, routineNotFound(projectID, datasetID, routineID)
}
rt := routineToREST(req.GetRoutine())
out := routineFromREST(projectID, datasetID, routineID, rt)
out.CreationTime = parseMillis(existing.CreationTime)
out.Etag = routines.MintEtag()
s.routineStore().Upsert(routineToREST(out))
return out, nil
}
// DeleteRoutine removes a routine from the store.
func (s *RoutineServer) DeleteRoutine(
_ context.Context,
req *bigquerypb.DeleteRoutineRequest,
) (*emptypb.Empty, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
routineID := req.GetRoutineId()
if !s.routineStore().Delete(projectID, datasetID, routineID) {
return nil, routineNotFound(projectID, datasetID, routineID)
}
return &emptypb.Empty{}, nil
}
package bqv2grpc
import (
"context"
"strconv"
"cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"google.golang.org/protobuf/types/known/emptypb"
"google.golang.org/protobuf/types/known/wrapperspb"
)
// TableServer implements google.cloud.bigquery.v2.TableService.
type TableServer struct {
bigquerypb.UnimplementedTableServiceServer
deps handlers.Dependencies
}
func newTableServer(deps handlers.Dependencies) *TableServer {
return &TableServer{deps: deps}
}
// ListTables lists tables from the engine catalog.
func (s *TableServer) ListTables(
ctx context.Context,
req *bigquerypb.ListTablesRequest,
) (*bigquerypb.TableList, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
if s.deps.Catalog == nil {
return &bigquerypb.TableList{
Kind: "bigquery#tableList",
Tables: []*bigquerypb.ListFormatTable{},
TotalItems: wrapperspb.Int32(0),
}, nil
}
resp, err := s.deps.Catalog.ListTables(ctx, &enginepb.ListTablesRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
items := make([]*bigquerypb.ListFormatTable, 0, len(resp.GetTables()))
for _, ref := range resp.GetTables() {
labels := map[string]string{}
tableType := tableTypeTable
if overlay, ok := s.deps.Metadata.GetTable(
ref.GetProjectId(), ref.GetDatasetId(), ref.GetTableId(),
); ok {
if overlay.Labels != nil {
labels = map[string]string(overlay.Labels)
}
if overlay.Type != "" {
tableType = overlay.Type
}
} else if refType := ref.GetTableType(); refType != "" {
tableType = refType
}
items = append(items, listTableFromRef(
ref.GetProjectId(), ref.GetDatasetId(), ref.GetTableId(), tableType, labels))
}
return &bigquerypb.TableList{
Kind: "bigquery#tableList",
Tables: items,
TotalItems: wrapperspb.Int32(int32FromInt(len(items))),
}, nil
}
// InsertTable registers a table in the engine catalog.
func (s *TableServer) InsertTable(
ctx context.Context,
req *bigquerypb.InsertTableRequest,
) (*bigquerypb.Table, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
t := tableToREST(req.GetTable())
tableID := t.TableReference.TableID
if tableID == "" {
return nil, invalidArg("tableReference.tableId is required")
}
if s.deps.Catalog == nil {
return nil, unimplemented("table insert requires an engine")
}
_, err := s.deps.Catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
Schema: schemaToEngine(req.GetTable().GetSchema()),
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
if t.DefaultCollation != "" {
t.Schema = bqtypes.ApplyDefaultCollationToStringFields(t.Schema, t.DefaultCollation)
}
s.deps.Metadata.PutTable(projectID, datasetID, tableID, t)
if s.deps.Snapshots != nil {
created := strconv.FormatInt(nowMillis(), 10)
if ms, parseErr := strconv.ParseInt(created, 10, 64); parseErr == nil {
s.deps.Snapshots.RecordCreation(projectID, datasetID, tableID, ms)
}
}
return tableFromREST(projectID, datasetID, tableID, t), nil
}
// GetTable returns table metadata from the engine.
func (s *TableServer) GetTable(
ctx context.Context,
req *bigquerypb.GetTableRequest,
) (*bigquerypb.Table, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
tableID := req.GetTableId()
if s.deps.Catalog == nil {
return tableFromREST(projectID, datasetID, tableID, bqtypes.Table{}), nil
}
resp, err := s.deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
t := bqtypes.Table{Schema: schemaFromProto(schemaToEngineProto(resp.GetSchema()))}
if overlay, ok := s.deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
t = applyTableOverlay(t, overlay)
}
if s.deps.Snapshots != nil {
if ct, ok := s.deps.Snapshots.CreationTimeMs(projectID, datasetID, tableID); ok {
t.CreationTime = strconv.FormatInt(ct, 10)
}
}
if rowsResp, listErr := s.deps.Catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
StartIndex: 0,
MaxResults: 0,
}); listErr == nil {
t.NumRows = strconv.FormatInt(rowsResp.GetTotalRows(), 10)
} else if t.NumRows == "" {
t.NumRows = "0"
}
return tableFromREST(projectID, datasetID, tableID, t), nil
}
// UpdateTable replaces table metadata in the store.
func (s *TableServer) UpdateTable(
ctx context.Context,
req *bigquerypb.UpdateOrPatchTableRequest,
) (*bigquerypb.Table, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
tableID := req.GetTableId()
t := tableToREST(req.GetTable())
s.deps.Metadata.PutTable(projectID, datasetID, tableID, t)
return tableFromREST(projectID, datasetID, tableID, t), nil
}
// PatchTable merges table metadata in the store.
func (s *TableServer) PatchTable(
ctx context.Context,
req *bigquerypb.UpdateOrPatchTableRequest,
) (*bigquerypb.Table, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
tableID := req.GetTableId()
t := tableToREST(req.GetTable())
s.deps.Metadata.MergeTable(projectID, datasetID, tableID, t)
if overlay, ok := s.deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
t = applyTableOverlay(t, overlay)
}
return tableFromREST(projectID, datasetID, tableID, t), nil
}
// DeleteTable drops a table from the engine catalog.
func (s *TableServer) DeleteTable(
ctx context.Context,
req *bigquerypb.DeleteTableRequest,
) (*emptypb.Empty, error) {
projectID := req.GetProjectId()
datasetID := req.GetDatasetId()
tableID := req.GetTableId()
if s.deps.Catalog == nil {
return nil, unimplemented("table delete requires an engine")
}
if s.deps.Snapshots != nil {
_ = s.deps.Snapshots.CaptureBeforeDelete(ctx, s.deps.Catalog,
projectID, datasetID, tableID)
}
_, err := s.deps.Catalog.DropTable(ctx, &enginepb.DropTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
})
if err != nil {
return nil, grpcStatusFromEngine(err)
}
s.deps.Metadata.DeleteTable(projectID, datasetID, tableID)
return &emptypb.Empty{}, nil
}
func schemaToEngineProto(s *enginepb.TableSchema) *bigquerypb.TableSchema {
if s == nil {
return nil
}
out := &bigquerypb.TableSchema{Fields: make([]*bigquerypb.TableFieldSchema, 0, len(s.GetFields()))}
for _, f := range s.GetFields() {
field := &bigquerypb.TableFieldSchema{
Name: f.GetName(),
Type: f.GetType(),
Mode: f.GetMode(),
}
if f.GetDescription() != "" {
field.Description = wrapperspb.String(f.GetDescription())
}
out.Fields = append(out.Fields, field)
}
return out
}
func applyTableOverlay(base, overlay bqtypes.Table) bqtypes.Table {
if overlay.FriendlyName != "" {
base.FriendlyName = overlay.FriendlyName
}
if overlay.Description != "" {
base.Description = overlay.Description
}
if overlay.Type != "" {
base.Type = overlay.Type
}
if overlay.Labels != nil {
base.Labels = overlay.Labels
}
if overlay.Schema != nil {
base.Schema = overlay.Schema
}
if overlay.DefaultCollation != "" {
base.DefaultCollation = overlay.DefaultCollation
}
return base
}
package handlers
import (
"context"
"fmt"
"net/http"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// catalogDatasetExists reports whether `datasetID` is registered in the
// engine catalog for `projectID`. When Catalog is nil (gateway-only unit
// tests) the function returns (true, nil) so handlers keep the legacy
// synthesized GET posture.
func catalogDatasetExists(
ctx context.Context,
deps Dependencies,
projectID, datasetID string,
) (bool, error) {
if deps.Catalog == nil {
return true, nil
}
resp, err := deps.Catalog.ListDatasets(ctx, &enginepb.ListDatasetsRequest{
ProjectId: projectID,
})
if err != nil {
return false, err
}
for _, ref := range resp.GetDatasets() {
if ref.GetDatasetId() == datasetID {
return true, nil
}
}
return false, nil
}
// writeDatasetNotFound writes the canonical BigQuery REST 404 for a
// missing dataset resource.
func writeDatasetNotFound(w http.ResponseWriter, projectID, datasetID string) {
writeError(w, http.StatusNotFound, reasonNotFound,
fmt.Sprintf("Not found: Dataset %s:%s", projectID, datasetID))
}
package handlers
import (
"fmt"
"net/http"
"strings"
)
// headerEmulatorAPIRegion is sent by thirdparty harnesses when the
// Node/Go client uses a regional Google API hostname while the TCP
// connection targets loopback. See third_party/node-bigquery-tests/
// test/setup.js and third_party/golang-bigquery-tests/bqopts.
const headerEmulatorAPIRegion = "X-BigQuery-Emulator-Api-Region"
// datasetMultiRegions is the small multi-region set upstream samples
// exercise. See docs/bigquery/docs/locations.md.
var datasetMultiRegions = map[string]struct{}{
"US": {},
"EU": {},
}
// datasetRegions is a subset of supported single regions wide enough
// for thirdparty samples (us-east4/us-central1/eu, ...).
var datasetRegions = map[string]struct{}{
"africa-south1": {},
"asia-east1": {},
"asia-east2": {},
"asia-northeast1": {},
"asia-northeast2": {},
"asia-northeast3": {},
"asia-south1": {},
"asia-south2": {},
"asia-southeast1": {},
"asia-southeast2": {},
"australia-southeast1": {},
"australia-southeast2": {},
"europe-central2": {},
"europe-north1": {},
"europe-southwest1": {},
"europe-west1": {},
"europe-west10": {},
"europe-west12": {},
"europe-west2": {},
"europe-west3": {},
"europe-west4": {},
"europe-west6": {},
"europe-west8": {},
"europe-west9": {},
"me-central1": {},
"me-central2": {},
"me-west1": {},
"northamerica-northeast1": {},
"northamerica-northeast2": {},
"southamerica-east1": {},
"southamerica-west1": {},
"us-central1": {},
"us-east1": {},
"us-east4": {},
"us-east5": {},
"us-south1": {},
"us-west1": {},
"us-west2": {},
"us-west3": {},
"us-west4": {},
}
func emulatorAPIRegion(r *http.Request) string {
return strings.ToLower(strings.TrimSpace(r.Header.Get(headerEmulatorAPIRegion)))
}
// normalizeDatasetLocation canonicalizes a BigQuery dataset location
// string. Returns empty when the value is not recognized.
func normalizeDatasetLocation(location string) string {
loc := strings.TrimSpace(location)
if loc == "" {
return "US"
}
if upper := strings.ToUpper(loc); len(upper) <= 3 {
if _, ok := datasetMultiRegions[upper]; ok {
return upper
}
}
lower := strings.ToLower(loc)
if _, ok := datasetRegions[lower]; ok {
return lower
}
return ""
}
// locationMatchesAPIRegion enforces regional-endpoint parity exercised
// by node-bigquery-tests `should fail to create a dataset using a
// different region from the client endpoint`.
func locationMatchesAPIRegion(normalizedLocation, apiRegion string) bool {
if apiRegion == "" {
return true
}
if strings.EqualFold(normalizedLocation, apiRegion) {
return true
}
// eu-bigquery.googleapis.com + location "eu" (normalized to EU).
if apiRegion == "eu" && normalizedLocation == "EU" {
return true
}
return false
}
// validateDatasetLocation checks the dataset location before any
// engine RPC so invalid regions surface as "Invalid storage region"
// ahead of duplicate-id errors from RegisterDataset.
func validateDatasetLocation(r *http.Request, location string) error {
normalized := normalizeDatasetLocation(location)
if normalized == "" {
raw := strings.TrimSpace(location)
if raw == "" {
raw = "US"
}
return fmt.Errorf("Invalid storage region: %s", raw) //nolint:staticcheck // BigQuery client error text
}
apiRegion := emulatorAPIRegion(r)
if !locationMatchesAPIRegion(normalized, apiRegion) {
display := strings.TrimSpace(location)
if display == "" {
display = normalized
}
return fmt.Errorf("Invalid storage region: %s", display) //nolint:staticcheck // BigQuery client error text
}
return nil
}
package handlers
import (
"net/http"
"strconv"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// datasetKind is the value the BigQuery REST API returns for the
// `kind` field of a Dataset resource. See
// docs/bigquery/docs/reference/rest/v2/datasets/get.md.
const datasetKind = "bigquery#dataset"
// datasetListKind is the `kind` field for a DatasetList response. See
// docs/bigquery/docs/reference/rest/v2/datasets/list.md.
const datasetListKind = "bigquery#datasetList"
// datasetIDFromPath returns the {projectId}/{datasetId} pair captured
// by the route pattern. It strips any trailing AIP-136 custom-method
// suffix (e.g. ":undelete") from the datasetId so the same helper can
// be reused by DatasetCustomMethodPOST.
func datasetIDFromPath(r *http.Request) (projectID, datasetID string) {
projectID = r.PathValue("projectId")
datasetID, _ = splitColonOp(r.PathValue("datasetId"))
return projectID, datasetID
}
// nowMillis is the BigQuery REST representation of a timestamp: a
// decimal string of milliseconds since epoch.
func nowMillis() string {
return strconv.FormatInt(time.Now().UnixMilli(), 10)
}
// datasetResource builds a Dataset resource for a successful response.
// Stamps Kind, ID, and timestamps; preserves any caller-provided
// metadata (FriendlyName, Description, Location) that the engine does
// not need to know about.
//
// Access is materialized to an empty slice when the caller did not
// provide one. The Java BigQuery client wraps `dataset.getAcl()` in
// `new ArrayList<>(...)`, which NPEs on a null value; live BigQuery
// returns `access: []` for newly-created datasets and ACL-mutation
// flows like AuthorizeDatasetIT depend on that shape.
//
// Labels is materialized to an empty map for the same reason: upstream
// samples call `Object.entries(dataset.metadata.labels)` /
// `dict(dataset.labels)` on the deserialized response, which raises
// `TypeError: Cannot convert undefined or null to object` /
// `TypeError: argument of type 'NoneType' is not iterable` on a nil
// value. The bqtypes.Dataset.Labels tag omits `omitempty` so the empty
// map round-trips as `"labels":{}` on the wire.
func datasetResource(projectID, datasetID string, ds bqtypes.Dataset) bqtypes.Dataset {
ds.Kind = datasetKind
ds.ID = projectID + ":" + datasetID
ds.DatasetReference = bqtypes.DatasetReference{
ProjectID: projectID,
DatasetID: datasetID,
}
if ds.CreationTime == "" {
ds.CreationTime = nowMillis()
}
if ds.LastModifiedTime == "" {
ds.LastModifiedTime = ds.CreationTime
}
if ds.Access == nil {
ds.Access = []map[string]any{}
}
if ds.Labels == nil {
ds.Labels = bqtypes.ResourceLabels{}
}
if ds.Location == "" {
ds.Location = "US"
}
return ds
}
// DatasetList implements `bigquery.datasets.list`:
//
// GET /bigquery/v2/projects/{projectId}/datasets
//
// Calls the Catalog.ListDatasets RPC and folds the (deterministically
// ordered, ascending dataset_id) result into a BigQuery datasetList
// envelope. The shape matches
// docs/bigquery/docs/reference/rest/v2/datasets/list.md.
//
// Each returned entry is the minimal dataset-list shape upstream
// emits: kind, id (projectId:datasetId), datasetReference, and an
// empty labels object so client samples that call
// `Object.entries(item.metadata.labels)` on each iteration item do
// not raise (mirrors TestDatasetGetLabelsIsEmptyObjectNotNull).
//
// Pagination: no `nextPageToken` today. The emulator is single-host
// and the catalog never exceeds a handful of datasets in practice;
// the engine helper returns every entry in one shot. When that
// changes (large-catalog stress lane / horizontal sharding) the
// gateway can grow a token by re-keying on dataset_id and slicing
// the response here.
func DatasetList(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
if deps.Catalog == nil {
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: datasetListKind,
resourceKeyDatasets: []bqtypes.Dataset{},
})
return
}
resp, err := deps.Catalog.ListDatasets(r.Context(), &enginepb.ListDatasetsRequest{
ProjectId: projectID,
})
if grpcToHTTPError(w, err) {
return
}
items := make([]map[string]any, 0, len(resp.GetDatasets()))
for _, ref := range resp.GetDatasets() {
labels := bqtypes.ResourceLabels{}
if overlay, ok := deps.Metadata.GetDataset(
ref.GetProjectId(), ref.GetDatasetId(),
); ok && overlay.Labels != nil {
labels = overlay.Labels
}
items = append(items, map[string]any{
"kind": datasetKind,
"id": ref.GetProjectId() + ":" + ref.GetDatasetId(),
"datasetReference": bqtypes.DatasetReference{
ProjectID: ref.GetProjectId(),
DatasetID: ref.GetDatasetId(),
},
"labels": labels,
})
}
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: datasetListKind,
resourceKeyDatasets: items,
})
}
}
// DatasetInsert implements `bigquery.datasets.insert`:
//
// POST /bigquery/v2/projects/{projectId}/datasets
//
// Decodes the Dataset body, calls Catalog.RegisterDataset on the
// engine, and returns the newly-created Dataset resource on success.
// The dataset's `datasetReference.datasetId` is required; the projectId
// is taken from the URL because the upstream API treats the path's
// projectId as authoritative when both are set.
func DatasetInsert(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
ds, ok := decodeDatasetBody(w, r)
if !ok {
return
}
datasetID := ds.DatasetReference.DatasetID
if datasetID == "" {
writeError(w, http.StatusBadRequest, "invalid",
"datasetReference.datasetId is required")
return
}
if err := validateDatasetLocation(r, ds.Location); err != nil {
writeError(w, http.StatusBadRequest, "invalid", err.Error())
return
}
if rejectUnsupportedDatasetPosture(w, &ds) {
return
}
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
_, err := deps.Catalog.RegisterDataset(r.Context(), &enginepb.RegisterDatasetRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
Location: ds.Location,
})
if grpcToHTTPError(w, err) {
return
}
deps.Metadata.PutDataset(projectID, datasetID, ds)
writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
}
}
// DatasetGet implements `bigquery.datasets.get`:
//
// GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// The Catalog gRPC service does not yet expose a Get RPC (only
// Register/Drop/List), so existence is checked via ListDatasets before
// synthesizing the Dataset resource from path parameters plus any
// MetadataStore overlay.
//
// REST-only metadata (labels, defaultCollation, friendlyName, ...) is
// surfaced from the in-memory MetadataStore so a prior
// Insert/Patch/Update round-trips through GET — required by the node
// `getDatasetLabels` sample's `Object.entries(dataset.metadata.labels)`
// loop.
func DatasetGet(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID := datasetIDFromPath(r)
exists, err := catalogDatasetExists(r.Context(), deps, projectID, datasetID)
if err != nil {
if grpcToHTTPError(w, err) {
return
}
}
if !exists {
writeDatasetNotFound(w, projectID, datasetID)
return
}
ds := bqtypes.Dataset{}
if overlay, ok := deps.Metadata.GetDataset(projectID, datasetID); ok {
ds = applyDatasetMetadataOverlay(ds, overlay)
}
writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
}
}
// DatasetUpdate implements `bigquery.datasets.update`:
//
// PUT /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// Full replacement of the Dataset metadata. The engine catalog does
// not yet have an update RPC, so the handler echoes the request body
// back as the canonical resource (stamping kind/id/timestamps) and
// records the REST-only metadata fields in the in-memory store so a
// subsequent GET returns the updated values.
func DatasetUpdate(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID := datasetIDFromPath(r)
exists, err := catalogDatasetExists(r.Context(), deps, projectID, datasetID)
if err != nil {
if grpcToHTTPError(w, err) {
return
}
}
if !exists {
writeDatasetNotFound(w, projectID, datasetID)
return
}
ds, ok := decodeDatasetBody(w, r)
if !ok {
return
}
if rejectUnsupportedDatasetPosture(w, &ds) {
return
}
deps.Metadata.PutDataset(projectID, datasetID, ds)
writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
}
}
// DatasetPatch implements `bigquery.datasets.patch`:
//
// PATCH /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// Sparse update; mirrors DatasetUpdate's metadata-stash posture so
// upstream `setMetadata` + `getMetadata` sequences roundtrip the
// REST-only fields.
func DatasetPatch(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID := datasetIDFromPath(r)
exists, err := catalogDatasetExists(r.Context(), deps, projectID, datasetID)
if err != nil {
if grpcToHTTPError(w, err) {
return
}
}
if !exists {
writeDatasetNotFound(w, projectID, datasetID)
return
}
ds, ok := decodeDatasetBody(w, r)
if !ok {
return
}
if rejectUnsupportedDatasetPosture(w, &ds) {
return
}
deps.Metadata.MergeDataset(projectID, datasetID, ds)
if overlay, ok := deps.Metadata.GetDataset(projectID, datasetID); ok {
ds = applyDatasetMetadataOverlay(ds, overlay)
}
if ds.LabelsPatchPresent() && len(ds.Labels) == 0 {
ds.SetOmitEmptyLabelsOnWire(true)
}
writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
}
}
// DatasetDelete implements `bigquery.datasets.delete`:
//
// DELETE /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// Honors the documented `deleteContents` query parameter by forwarding
// it as DropDatasetRequest.delete_contents; without it the engine
// refuses to drop a non-empty dataset (FailedPrecondition → 400).
func DatasetDelete(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID := datasetIDFromPath(r)
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
deleteContents := r.URL.Query().Get("deleteContents") == queryParamTrue
_, err := deps.Catalog.DropDataset(r.Context(), &enginepb.DropDatasetRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
DeleteContents: deleteContents,
RestMetadataJson: deps.Metadata.RestMetadataJSON(projectID, datasetID),
})
if grpcToHTTPError(w, err) {
return
}
deps.Metadata.DeleteDataset(projectID, datasetID)
if deleteContents {
deps.Metadata.DeleteTablesInDataset(projectID, datasetID)
}
writeJSON(w, http.StatusOK, struct{}{})
}
}
// DatasetUndelete implements `bigquery.datasets.undelete`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}:undelete
//
// Reached via DatasetCustomMethodPOST after parsing the trailing :op.
func DatasetUndelete(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID := datasetIDFromPath(r)
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
resp, err := deps.Catalog.UndeleteDataset(r.Context(), &enginepb.UndeleteDatasetRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
})
if grpcToHTTPError(w, err) {
return
}
if resp != nil && resp.GetRestMetadataJson() != "" {
deps.Metadata.RestoreDatasetRestMetadataJSON(
projectID, datasetID, resp.GetRestMetadataJson())
}
ds, ok := deps.Metadata.GetDataset(projectID, datasetID)
if !ok {
ds = bqtypes.Dataset{Location: "US"}
}
writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
}
}
// DatasetCustomMethodPOST dispatches the AIP-136 custom-method POST
// endpoints registered against `/datasets/{datasetId}` (which Go's mux
// can't match as `:op` directly). Today the only such method is
// `datasets.undelete`; future BigQuery additions can be added here.
func DatasetCustomMethodPOST(deps Dependencies) http.HandlerFunc {
undelete := DatasetUndelete(deps)
return func(w http.ResponseWriter, r *http.Request) {
_, op := splitColonOp(r.PathValue("datasetId"))
switch op {
case "undelete":
undelete(w, r)
case "":
writeError(w, http.StatusMethodNotAllowed, "invalid",
"POST is not allowed on a dataset resource. "+
"Use POST /datasets to create, or a documented :op "+
"custom method (e.g. :undelete).")
default:
writeError(w, http.StatusNotFound, "notFound",
"Unknown dataset custom method ':"+op+"'.")
}
}
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datatransfer
import (
"fmt"
"net/url"
"strings"
)
// authTypeOAuth is the BigQuery DataTransfer `authorizationType` value
// for connectors that authenticate via OAuth. Hoisted to a package
// const so the (otherwise repetitive) catalog entries below all
// reference the same source of truth.
const authTypeOAuth = "AUTHORIZATION_TYPE_OAUTH"
// DataSourceCatalogEntry describes a connector surfaced in the
// dataSources list/get responses. AuthorizationURLPlaceholder is
// emitted as JSON `authorizationUrl` when non-empty (an inert .invalid
// host; the emulator does not perform real OAuth or third-party
// traffic).
type DataSourceCatalogEntry struct {
DataSourceID string
DisplayName string
Description string
AuthorizationType string
DefaultDataRefreshIntervalDays int32
AuthorizationURLPlaceholder string
}
func buildAuthorizationPlaceholder(template, project, location, dataSourceID string) string {
if strings.TrimSpace(template) == "" {
return ""
}
if strings.Contains(template, "%") {
return fmt.Sprintf(template, project, location, dataSourceID)
}
return template
}
// builtinDataSourceCatalog returns the connectors the emulator
// surfaces by default. `scheduled_query` exists as the canonical
// SQL-execution surface (the SQL runner follow-up will wire a
// Runner). `amazon_s3` satisfies CreateAmazonS3TransferIT's catalog
// probe even though no transfer is actually performed — the IT only
// asserts the create returned a name. The remaining entries cover
// the third-party `Create*Transfer.java` driver classes; each is a
// metadata-only
// stub (no transfer execution; no third-party traffic).
//
// All third-party rows use the same inert .invalid authorization-URL
// placeholder so `GET .../dataSources/{id}` returns a deterministic
// `authorizationUrl` without ever performing OAuth.
// oauthThirdPartyStubs lists the metadata-only third-party
// connectors the emulator advertises in its dataSources catalog.
// Each row maps directly onto a Create*Transfer.java IT driver class
// (Amazon S3 / Google Ad Manager / Google Ads / Campaign Manager /
// Google Play / Amazon Redshift / Teradata / YouTube Channel /
// YouTube Content Owner). The catalog only carries metadata; no
// transfer execution and no third-party traffic happens.
var oauthThirdPartyStubs = []struct {
ID, Display, Desc string
}{
{
dataSourceAmazonS3,
"Amazon S3 (emulator catalog stub)",
"Metadata-only stub for third-party connector discovery; transfer execution and credential validation are not implemented.",
},
{
dataSourceAdManager,
"Google Ad Manager (emulator catalog stub)",
"Metadata-only stub for the dfp_dt connector used by CreateAdManagerTransfer; transfer execution is not implemented.",
},
{
dataSourceGoogleAds,
"Google Ads (emulator catalog stub)",
"Metadata-only stub for the adwords connector used by CreateAdsTransfer; transfer execution is not implemented.",
},
{
dataSourceCampaignManager,
"Campaign Manager (emulator catalog stub)",
"Metadata-only stub for the dcm_dt connector used by CreateCampaignmanagerTransfer; transfer execution is not implemented.",
},
{
dataSourcePlay,
"Google Play (emulator catalog stub)",
"Metadata-only stub for the play connector used by CreatePlayTransfer; transfer execution is not implemented.",
},
{
dataSourceRedshift,
"Amazon Redshift (emulator catalog stub)",
"Metadata-only stub for the redshift connector used by CreateRedshiftTransfer; transfer execution and credential validation are not implemented.",
},
{
dataSourceOnPremises,
"Teradata / on-premises (emulator catalog stub)",
"Metadata-only stub for the on_premises connector used by CreateTeradataTransfer; transfer execution and Teradata agent integration are not implemented.",
},
{
dataSourceYoutubeChannel,
"YouTube Channel (emulator catalog stub)",
"Metadata-only stub for the youtube_channel connector used by CreateYoutubeChannelTransfer; transfer execution is not implemented.",
},
{
dataSourceYoutubeContentOwner,
"YouTube Content Owner (emulator catalog stub)",
"Metadata-only stub for the youtube_content_owner connector used by CreateYoutubeContentOwnerTransfer; transfer execution is not implemented.",
},
}
// oauthAuthorizationURLPlaceholder is the inert .invalid URL the
// catalog emits as `authorizationUrl` for every OAuth third-party
// stub. Lifted to a package const so the test fixtures and the
// catalog-builder share the same source of truth.
const oauthAuthorizationURLPlaceholder = "https://oauth-emulator.invalid/authorize?response_type=code&client_id=emulator-not-configured&data_source_id=%[3]s&project=%[1]s&location=%[2]s"
func builtinDataSourceCatalog() []DataSourceCatalogEntry {
out := []DataSourceCatalogEntry{
{
DataSourceID: dataSourceScheduledQuery,
DisplayName: "Scheduled Query (emulator)",
Description: "Runs BigQuery SQL on demand via startManualRuns or POST .../runs when a ScheduledQueryRunner is wired; no cron or third-party I/O.",
AuthorizationType: "AUTHORIZATION_TYPE_GOOGLE_PLUS_AUTHORIZATION_CODE",
},
}
for _, s := range oauthThirdPartyStubs {
out = append(out, oauthStubEntry(s.ID, s.Display, s.Desc))
}
return out
}
// oauthStubEntry builds a metadata-only OAuth third-party catalog
// entry. The OAuth-related fields (authorization type, daily refresh,
// inert .invalid authorization URL placeholder) are the same for
// every third-party stub, so the per-row table only carries the
// fields that actually differ.
func oauthStubEntry(id, display, desc string) DataSourceCatalogEntry {
return DataSourceCatalogEntry{
DataSourceID: id,
DisplayName: display,
Description: desc,
AuthorizationType: authTypeOAuth,
DefaultDataRefreshIntervalDays: 1,
AuthorizationURLPlaceholder: oauthAuthorizationURLPlaceholder,
}
}
func (h *Handler) mergedCatalogEntries() []DataSourceCatalogEntry {
base := builtinDataSourceCatalog()
if h == nil || len(h.DataSourceCatalogExtras) == 0 {
return base
}
byID := make(map[string]DataSourceCatalogEntry)
order := make([]string, 0, len(base)+len(h.DataSourceCatalogExtras))
for _, e := range base {
id := strings.TrimSpace(e.DataSourceID)
if id == "" {
continue
}
e.DataSourceID = id
byID[id] = e
order = append(order, id)
}
for _, e := range h.DataSourceCatalogExtras {
id := strings.TrimSpace(e.DataSourceID)
if id == "" {
continue
}
e.DataSourceID = id
if _, exists := byID[id]; !exists {
order = append(order, id)
}
byID[id] = e
}
out := make([]DataSourceCatalogEntry, 0, len(order))
for _, id := range order {
out = append(out, byID[id])
}
return out
}
func (h *Handler) catalogEntryByID(id string) (DataSourceCatalogEntry, bool) {
id = strings.TrimSpace(id)
if id == "" {
return DataSourceCatalogEntry{}, false
}
for _, e := range h.mergedCatalogEntries() {
if e.DataSourceID == id {
return e, true
}
}
return DataSourceCatalogEntry{}, false
}
func (h *Handler) dataSourceResource(project, location string, e DataSourceCatalogEntry) dataSourceResource {
name := fmt.Sprintf("projects/%s/locations/%s/dataSources/%s", project, location, e.DataSourceID)
r := dataSourceResource{
Name: name,
DataSourceID: e.DataSourceID,
DisplayName: e.DisplayName,
Description: e.Description,
AuthorizationType: e.AuthorizationType,
DefaultDataRefreshIntervalDays: e.DefaultDataRefreshIntervalDays,
}
if u := buildAuthorizationPlaceholder(e.AuthorizationURLPlaceholder, project, location, e.DataSourceID); u != "" {
// Guard: placeholders must stay on the inert host or relative;
// never emit bare secrets.
if parsed, err := url.Parse(u); err == nil && parsed.Scheme != "" && parsed.Host != "" {
r.AuthorizationURL = u
}
}
return r
}
// Package datatransfer implements a minimal BigQuery Data Transfer Service
// REST shell on the emulator's HTTP mux: dataSources catalog, transferConfigs
// CRUD (in-memory), transferRuns CRUD, and the AIP-136 custom methods
// (`scheduleRuns`, `checkValidCreds`, `startManualRuns`).
//
// Shallow-emulator port per docs/ENGINE_POLICY.md.
// The apiregion.CheckHTTP location-mismatch gate is intentionally dropped
// here — this repo's REST surface does not yet surface regional endpoints
// and the emulator's docker-compose listener is always loopback. Logging
// goes through `log/slog.New(slog.DiscardHandler)` when the caller does
// not provide one.
//
// The package is wired by `gateway/server.go` via `(*Handler).Register(mux)`.
// Routes registered:
//
// GET /v1/projects/{projectId}/locations/{location}/dataSources
// GET /v1/projects/{projectId}/locations/{location}/dataSources/{dataSourceId}
// GET /v1/projects/{projectId}/transferConfigs
// POST /v1/projects/{projectId}/transferConfigs
// GET /v1/projects/{projectId}/locations/{location}/transferConfigs
// POST /v1/projects/{projectId}/locations/{location}/transferConfigs
// GET /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}
// PATCH /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}
// DELETE /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}
// POST /v1/projects/{projectId}/locations/{location}/transferConfigs/{configSeg} (AIP-136 :scheduleRuns /
//
// :checkValidCreds / :startManualRuns)
//
// GET /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}/runs
// POST /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}/runs
// GET /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}/runs/{runId}
package datatransfer
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"net/http"
"slices"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
)
// transferStateSucceeded is the JSON state string for a completed
// transfer config or run.
const transferStateSucceeded = "SUCCEEDED"
// transferStateFailed is the JSON state string for a failed transfer
// run.
const transferStateFailed = "FAILED"
// dataSourceScheduledQuery is the dataSourceId for the scheduled SQL
// connector. The emulator only executes this surface when a Runner is
// wired (the SQL runner follow-up).
const dataSourceScheduledQuery = "scheduled_query"
// dataSourceAmazonS3 is a metadata-only stub for third-party
// connector discovery (the failing-IT baseline row 13:
// CreateAmazonS3TransferIT).
const dataSourceAmazonS3 = "amazon_s3"
// The following dataSourceId constants are the connector identifiers
// the upstream `Create*Transfer.java` driver classes send on
// CreateTransferConfig. They are metadata-only stubs; the emulator
// does not perform any third-party traffic. The connector IDs come
// directly from the snippet drivers (see e.g. CreateAdManagerTransfer
// → `dfp_dt`, CreateAdsTransfer → `adwords`, CreateTeradataTransfer →
// `on_premises`). The earlier shallow-emulator design listed three
// IDs that diverge from what the drivers send (`admanager_transfer`,
// `google_ads`, `teradata`); registering the driver-side IDs is what
// actually moves CreateTransferConfig forward, so we follow the
// drivers here.
const (
dataSourceAdManager = "dfp_dt"
dataSourceGoogleAds = "adwords"
dataSourceCampaignManager = "dcm_dt"
dataSourcePlay = "play"
dataSourceRedshift = "redshift"
dataSourceOnPremises = "on_premises"
dataSourceYoutubeChannel = "youtube_channel"
dataSourceYoutubeContentOwner = "youtube_content_owner"
)
const transferRunErrorMessageKey = "message"
func transferRunErrorPayload(msg string) map[string]any {
return map[string]any{transferRunErrorMessageKey: msg}
}
// ScheduledQueryRunner executes scheduled_query transfer SQL against
// the emulator catalog. The shallow-emulator port keeps this as a
// hook the gateway can fill in once `gateway/handlers/queries.go` is
// reachable from the gRPC-free unit-test path; left nil for now (no
// SQL execution).
type ScheduledQueryRunner interface {
RunScheduledQueryTransfer(project, location, sql, defaultDatasetID string) error
}
// Handler stores transfer config and run metadata in memory.
type Handler struct {
Log *slog.Logger
// Runner optional; when set, scheduled_query manual runs and run
// inserts execute SQL locally.
Runner ScheduledQueryRunner
// DataSourceCatalogExtras are merged into the built-in dataSources
// catalog (same dataSourceId: extras win).
DataSourceCatalogExtras []DataSourceCatalogEntry
mu sync.Mutex
nextRunID atomic.Uint64
configs map[string]*transferConfigResource
runs map[string]*transferRunResource
}
// NewHandler returns an empty in-memory transfer service shell.
func NewHandler(log *slog.Logger) *Handler {
return &Handler{
Log: log,
configs: make(map[string]*transferConfigResource),
runs: make(map[string]*transferRunResource),
}
}
func (h *Handler) logger() *slog.Logger {
if h != nil && h.Log != nil {
return h.Log
}
return slog.New(slog.DiscardHandler)
}
// Register wires v1 transfer config + run + dataSource routes into the
// caller's mux. The path shape matches the upstream BigQuery Data
// Transfer API (the gapic clients construct paths under `/v1/...`).
func (h *Handler) Register(mux *http.ServeMux) {
loc := "/v1/projects/{projectId}/locations/{location}"
mux.HandleFunc(http.MethodGet+" "+loc+"/dataSources", h.handleListDataSources)
mux.HandleFunc(http.MethodGet+" "+loc+"/dataSources/{dataSourceId}", h.handleGetDataSource)
// Project-scoped (no /locations/) variant: gapic Go REST clients
// construct parent="projects/{p}" for create/list.
projBase := "/v1/projects/{projectId}/transferConfigs"
mux.HandleFunc(http.MethodGet+" "+projBase, h.handleListConfigsProjectScoped)
mux.HandleFunc(http.MethodPost+" "+projBase, h.handleCreateConfigProjectScoped)
base := loc + "/transferConfigs"
mux.HandleFunc(http.MethodGet+" "+base, h.handleListConfigs)
mux.HandleFunc(http.MethodPost+" "+base, h.handleCreateConfig)
mux.HandleFunc(http.MethodGet+" "+base+"/{configId}", h.handleGetConfig)
mux.HandleFunc(http.MethodPatch+" "+base+"/{configId}", h.handlePatchConfig)
mux.HandleFunc(http.MethodDelete+" "+base+"/{configId}", h.handleDeleteConfig)
mux.HandleFunc(http.MethodPost+" "+base+"/{configSeg}", h.handleConfigPostSegment)
runsBase := base + "/{configId}/runs"
mux.HandleFunc(http.MethodGet+" "+runsBase, h.handleListRuns)
mux.HandleFunc(http.MethodPost+" "+runsBase, h.handleCreateRun)
mux.HandleFunc(http.MethodGet+" "+runsBase+"/{runId}", h.handleGetRun)
}
// transferConfigResource is the JSON-on-wire shape for a single
// transfer config. Mirrors the proto3 field names the upstream gapic
// clients emit (camelCase). Disabled is *bool (not bool) so the patch
// path can distinguish "not in mask" from "set to false" — that is the
// fix the failing-IT rows 14 (DisableTransferConfigIT) and 15
// (ReEnableTransferConfigIT) exercise.
type transferConfigResource struct {
Name string `json:"name,omitempty"`
DisplayName string `json:"displayName,omitempty"`
DataSourceID string `json:"dataSourceId,omitempty"`
Schedule string `json:"schedule,omitempty"`
Params map[string]any `json:"params,omitempty"`
State string `json:"state,omitempty"`
Disabled *bool `json:"disabled,omitempty"`
CreateTime string `json:"createTime,omitempty"`
NextRunTime string `json:"nextRunTime,omitempty"`
UserID int64 `json:"userId,omitempty"`
DatasetRegion string `json:"datasetRegion,omitempty"`
DestinationDatasetID string `json:"destinationDatasetId,omitempty"`
DestinationDataset *struct {
DatasetReference *struct {
ProjectID string `json:"projectId,omitempty"`
DatasetID string `json:"datasetId,omitempty"`
} `json:"datasetReference,omitempty"`
} `json:"destinationDataset,omitempty"`
DisableAutoScheduling bool `json:"disableAutoScheduling,omitempty"`
}
type listConfigsResponse struct {
TransferConfigs []transferConfigResource `json:"transferConfigs"`
NextPageToken string `json:"nextPageToken,omitempty"`
}
type dataSourceResource struct {
Name string `json:"name"`
DataSourceID string `json:"dataSourceId"`
DisplayName string `json:"displayName,omitempty"`
Description string `json:"description,omitempty"`
AuthorizationType string `json:"authorizationType,omitempty"`
DefaultDataRefreshIntervalDays int32 `json:"defaultDataRefreshIntervalDays,omitempty"`
AuthorizationURL string `json:"authorizationUrl,omitempty"`
}
type listDataSourcesResponse struct {
DataSources []dataSourceResource `json:"dataSources"`
NextPageToken string `json:"nextPageToken,omitempty"`
}
type transferRunResource struct {
Name string `json:"name"`
State string `json:"state,omitempty"`
Errors []any `json:"errors,omitempty"`
ScheduleTime string `json:"scheduleTime,omitempty"`
RunTime string `json:"runTime,omitempty"`
UpdateTime string `json:"updateTime,omitempty"`
DataSourceID string `json:"dataSourceId,omitempty"`
Params map[string]any `json:"params,omitempty"`
DatasetRegion string `json:"datasetRegion,omitempty"`
DestinationDataset *struct {
DatasetReference *struct {
ProjectID string `json:"projectId,omitempty"`
DatasetID string `json:"datasetId,omitempty"`
} `json:"datasetReference,omitempty"`
} `json:"destinationDataset,omitempty"`
}
func configName(project, location, id string) string {
return fmt.Sprintf("projects/%s/locations/%s/transferConfigs/%s", project, location, id)
}
func runName(project, location, configID, runID string) string {
return configName(project, location, configID) + "/runs/" + runID
}
// writeAPIError emits a Google-style error envelope (mirrors
// `gateway/handlers.writeError`'s shape). Localised here so the
// package does not import handlers (and doesn't pull the
// engine-client deps in with it).
func writeAPIError(log *slog.Logger, w http.ResponseWriter, status int, msg string) {
if log != nil {
log.Error("datatransfer api error",
slog.Int("status", status),
slog.String("message", msg),
)
}
body := map[string]any{
"error": map[string]any{
"code": status,
"message": msg,
"status": apiErrorReason(status),
"errors": []map[string]any{{
"reason": apiErrorReason(status),
"message": msg,
"domain": "global",
}},
},
}
writeJSON(log, w, status, body)
}
func apiErrorReason(status int) string {
switch status {
case http.StatusBadRequest:
return "badRequest"
case http.StatusUnauthorized:
return "unauthorized"
case http.StatusForbidden:
return "forbidden"
case http.StatusNotFound:
return "notFound"
case http.StatusConflict:
return "alreadyExists"
case http.StatusInternalServerError:
return "internalError"
case http.StatusNotImplemented:
return "notImplemented"
case http.StatusMethodNotAllowed:
return "methodNotAllowed"
default:
if status >= 500 {
return "internalError"
}
if status >= 400 {
return "badRequest"
}
return "unknown"
}
}
func writeJSON(log *slog.Logger, w http.ResponseWriter, status int, v any) {
w.Header().Set("Content-Type", "application/json; charset=UTF-8")
w.WriteHeader(status)
if err := json.NewEncoder(w).Encode(v); err != nil && log != nil {
log.Error("datatransfer: encode json response", slog.String("err", err.Error()))
}
}
func (h *Handler) handleListDataSources(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
entries := h.mergedCatalogResources(project, location)
start, end := pageWindow(len(entries), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
page := entries[start:end]
resp := listDataSourcesResponse{DataSources: page}
if end < len(entries) {
resp.NextPageToken = strconv.Itoa(end)
}
writeJSON(h.logger(), w, http.StatusOK, resp)
}
func (h *Handler) mergedCatalogResources(project, location string) []dataSourceResource {
entries := h.mergedCatalogEntries()
out := make([]dataSourceResource, 0, len(entries))
for _, e := range entries {
out = append(out, h.dataSourceResource(project, location, e))
}
return out
}
func (h *Handler) handleGetDataSource(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
dsID := strings.TrimSpace(r.PathValue("dataSourceId"))
entry, ok := h.catalogEntryByID(dsID)
if !ok {
writeAPIError(h.logger(), w, http.StatusNotFound,
"Not found: DataSource "+dsID)
return
}
out := h.dataSourceResource(project, location, entry)
writeJSON(h.logger(), w, http.StatusOK, out)
}
func parseDataSourceIDsFilter(q map[string][]string) []string {
ids := q["dataSourceIds"]
if len(ids) == 0 {
ids = q["dataSourceIds[]"]
}
out := make([]string, 0, len(ids))
for _, id := range ids {
if s := strings.TrimSpace(id); s != "" {
out = append(out, s)
}
}
return out
}
func configMatchesDataSourceFilter(cfg *transferConfigResource, filter []string) bool {
if len(filter) == 0 || cfg == nil {
return true
}
ds := strings.TrimSpace(cfg.DataSourceID)
return slices.Contains(filter, ds)
}
func (h *Handler) handleListConfigs(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
prefix := fmt.Sprintf("projects/%s/locations/%s/transferConfigs/", project, location)
dsFilter := parseDataSourceIDsFilter(r.URL.Query())
h.mu.Lock()
var keys []string
for k := range h.configs {
if strings.HasPrefix(k, prefix) {
keys = append(keys, k)
}
}
h.mu.Unlock()
sort.Strings(keys)
h.mu.Lock()
filtered := make([]string, 0, len(keys))
for _, k := range keys {
if c, ok := h.configs[k]; ok && configMatchesDataSourceFilter(c, dsFilter) {
filtered = append(filtered, k)
}
}
h.mu.Unlock()
start, end := pageWindow(len(filtered), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
pageKeys := filtered[start:end]
h.mu.Lock()
defer h.mu.Unlock()
out := make([]transferConfigResource, 0, len(pageKeys))
for _, k := range pageKeys {
if c, ok := h.configs[k]; ok {
out = append(out, *c)
}
}
resp := listConfigsResponse{TransferConfigs: out}
if end < len(filtered) {
resp.NextPageToken = strconv.Itoa(end)
}
writeJSON(h.logger(), w, http.StatusOK, resp)
}
func (h *Handler) handleCreateConfig(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
body, err := io.ReadAll(r.Body)
if err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid body")
return
}
_ = r.Body.Close()
var in transferConfigResource
if len(strings.TrimSpace(string(body))) > 0 {
if err := json.Unmarshal(body, &in); err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid json: "+err.Error())
return
}
}
normalizeTransferConfigInput(project, &in)
h.finishCreateTransferConfig(w, project, location, in)
}
func (h *Handler) finishCreateTransferConfig(
w http.ResponseWriter,
project, location string,
in transferConfigResource,
) {
id := pathSuffixOrGen(in.Name, "tc_"+randomHex(16))
name := configName(project, location, id)
now := time.Now().UTC().Format(time.RFC3339Nano)
nextRun := strings.TrimSpace(in.NextRunTime)
uid := int64(1)
if in.UserID != 0 {
uid = in.UserID
}
rec := transferConfigResource{
Name: name,
DisplayName: in.DisplayName,
DataSourceID: in.DataSourceID,
Schedule: in.Schedule,
Params: in.Params,
State: transferStateSucceeded,
Disabled: in.Disabled,
CreateTime: now,
NextRunTime: nextRun,
UserID: uid,
DatasetRegion: strings.TrimSpace(in.DatasetRegion),
DestinationDatasetID: strings.TrimSpace(in.DestinationDatasetID),
DisableAutoScheduling: in.DisableAutoScheduling,
}
if in.DestinationDataset != nil {
rec.DestinationDataset = in.DestinationDataset
}
h.mu.Lock()
if _, dup := h.configs[name]; dup {
h.mu.Unlock()
writeAPIError(h.logger(), w, http.StatusConflict, "transfer config already exists")
return
}
h.configs[name] = &rec
h.maybeSeedInitialScheduledQueryRun(project, location, id, &rec)
out := rec
h.mu.Unlock()
writeJSON(h.logger(), w, http.StatusOK, out)
}
func (h *Handler) maybeSeedInitialScheduledQueryRun(project, location, configID string, cfg *transferConfigResource) {
if h == nil || cfg == nil {
return
}
if strings.TrimSpace(cfg.DataSourceID) != dataSourceScheduledQuery {
return
}
run := h.newTransferRun(project, location, configID, cfg)
if h.Runner != nil && !cfg.DisableAutoScheduling && strings.TrimSpace(cfg.Schedule) != "" {
if stop, _, msg := h.maybeExecuteScheduledQueryOnRun(project, location, cfg, run); stop {
run.State = transferStateFailed
run.Errors = []any{transferRunErrorPayload(msg)}
}
}
h.runs[run.Name] = run
}
func pathSuffixOrGen(name, fallbackID string) string {
name = strings.TrimSpace(name)
if name == "" {
return fallbackID
}
if i := strings.LastIndex(name, "/"); i >= 0 {
return name[i+1:]
}
return fallbackID
}
func randomHex(n int) string {
buf := make([]byte, n)
_, _ = rand.Read(buf)
return hex.EncodeToString(buf)
}
func (h *Handler) handleGetConfig(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
id := r.PathValue("configId")
name := configName(project, location, id)
h.mu.Lock()
defer h.mu.Unlock()
c, ok := h.configs[name]
if !ok {
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+id)
return
}
out := *c
writeJSON(h.logger(), w, http.StatusOK, out)
}
package datatransfer
import (
"encoding/json"
"errors"
"io"
"net/http"
"strings"
)
// handlePatchConfig honors the `disabled` field on the request body;
// because Disabled is *bool, an explicit `"disabled": false` flips a
// disabled config back on (failing-IT row 15:
// ReEnableTransferConfigIT) and `"disabled": true` disables it (row
// 14: DisableTransferConfigIT). Other fields update only when
// non-zero.
//
// updateMask is parsed from the `updateMask` query parameter (gapic
// REST clients append it). The shallow-emulator port keeps the mask
// advisory: the mask names are not enforced, the body's non-zero
// fields drive the patch.
// That matches the existing emulator pattern for other PATCH
// endpoints.
func (h *Handler) handlePatchConfig(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
id := r.PathValue("configId")
name := configName(project, location, id)
body, err := io.ReadAll(r.Body)
if err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid body")
return
}
_ = r.Body.Close()
var patch transferConfigResource
if err := json.Unmarshal(body, &patch); err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid json: "+err.Error())
return
}
h.mu.Lock()
defer h.mu.Unlock()
cur, ok := h.configs[name]
if !ok {
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+id)
return
}
if patch.DisplayName != "" {
cur.DisplayName = patch.DisplayName
}
if patch.Schedule != "" {
cur.Schedule = patch.Schedule
}
if patch.Params != nil {
cur.Params = patch.Params
}
if patch.DatasetRegion != "" {
cur.DatasetRegion = patch.DatasetRegion
}
if patch.DestinationDatasetID != "" {
cur.DestinationDatasetID = patch.DestinationDatasetID
}
if patch.DestinationDataset != nil {
cur.DestinationDataset = patch.DestinationDataset
}
if patch.NextRunTime != "" {
cur.NextRunTime = patch.NextRunTime
}
if patch.Disabled != nil {
cur.Disabled = patch.Disabled
}
out := *cur
writeJSON(h.logger(), w, http.StatusOK, out)
}
func (h *Handler) handleDeleteConfig(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
id := r.PathValue("configId")
name := configName(project, location, id)
h.mu.Lock()
if _, ok := h.configs[name]; !ok {
h.mu.Unlock()
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+id)
return
}
delete(h.configs, name)
prefix := name + "/runs/"
for k := range h.runs {
if strings.HasPrefix(k, prefix) {
delete(h.runs, k)
}
}
h.mu.Unlock()
w.WriteHeader(http.StatusOK)
}
// readOptionalJSONProbe consumes an optional JSON body for the AIP-136
// custom methods (`:checkValidCreds`, `:startManualRuns`). Returning
// nil means the caller may proceed; a non-nil error is the wire-shape
// reason for a 400.
func readOptionalJSONProbe(r *http.Request) error {
body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20))
if err != nil {
return errors.New("invalid body")
}
_ = r.Body.Close()
if len(strings.TrimSpace(string(body))) == 0 {
return nil
}
var probe map[string]any
if err := json.Unmarshal(body, &probe); err != nil {
return errors.New("invalid json: " + err.Error())
}
return nil
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datatransfer
import (
"fmt"
"net/http"
"sort"
"strconv"
"strings"
"time"
)
type listRunsResponse struct {
TransferRuns []transferRunResource `json:"transferRuns"`
NextPageToken string `json:"nextPageToken,omitempty"`
}
func (h *Handler) handleListRuns(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
configID := r.PathValue("configId")
prefix := configName(project, location, configID) + "/runs/"
h.mu.Lock()
var keys []string
for k := range h.runs {
if strings.HasPrefix(k, prefix) {
keys = append(keys, k)
}
}
h.mu.Unlock()
sort.Strings(keys)
start, end := pageWindow(len(keys), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
pageKeys := keys[start:end]
h.mu.Lock()
defer h.mu.Unlock()
out := make([]transferRunResource, 0, len(pageKeys))
for _, k := range pageKeys {
if run, ok := h.runs[k]; ok {
out = append(out, *run)
}
}
resp := listRunsResponse{TransferRuns: out}
if end < len(keys) {
resp.NextPageToken = strconv.Itoa(end)
}
writeJSON(h.logger(), w, http.StatusOK, resp)
}
func unsupportedCreateRunDataSource(ds string) (string, bool) {
ds = strings.TrimSpace(ds)
if ds != "" && ds != dataSourceScheduledQuery {
return fmt.Sprintf(
"transfer run creation for data source %q is not supported by the emulator (metadata catalog only)",
ds,
), true
}
return "", false
}
func (h *Handler) newTransferRun(project, location, configID string, cp *transferConfigResource) *transferRunResource {
runID := h.allocRunID()
name := runName(project, location, configID, runID)
now := time.Now().UTC().Format(time.RFC3339Nano)
run := &transferRunResource{
Name: name,
DataSourceID: cp.DataSourceID,
Params: cp.Params,
DatasetRegion: cp.DatasetRegion,
ScheduleTime: now,
RunTime: now,
UpdateTime: now,
State: transferStateSucceeded,
}
if cp.DestinationDataset != nil {
run.DestinationDataset = cp.DestinationDataset
}
return run
}
// maybeExecuteScheduledQueryOnRun runs SQL when a Runner is wired;
// mutates run state on failure. Returns stop=true with status+message
// only when the input is invalid (bad params); a Runner failure
// itself becomes a FAILED run, not a 4xx.
func (h *Handler) maybeExecuteScheduledQueryOnRun(
project, location string,
cp *transferConfigResource,
run *transferRunResource,
) (stop bool, status int, msg string) {
if h.Runner == nil || strings.TrimSpace(cp.DataSourceID) != dataSourceScheduledQuery {
return false, 0, ""
}
sql, err := scheduledQueryText(cp.Params)
if err != nil {
return true, http.StatusBadRequest, err.Error()
}
defDS := destinationDatasetID(cp)
if defDS == "" {
defDS = strings.TrimSpace(cp.DestinationDatasetID)
}
if err := h.Runner.RunScheduledQueryTransfer(project, location, sql, defDS); err != nil {
run.State = transferStateFailed
run.Errors = []any{transferRunErrorPayload(err.Error())}
} else {
run.State = transferStateSucceeded
}
return false, 0, ""
}
func (h *Handler) handleCreateRun(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
configID := r.PathValue("configId")
cfgName := configName(project, location, configID)
h.mu.Lock()
cfg, ok := h.configs[cfgName]
if !ok {
h.mu.Unlock()
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+configID)
return
}
cp := *cfg
h.mu.Unlock()
if msg, bad := unsupportedCreateRunDataSource(cp.DataSourceID); bad {
writeAPIError(h.logger(), w, http.StatusNotImplemented, msg)
return
}
run := h.newTransferRun(project, location, configID, &cp)
if stop, st, m := h.maybeExecuteScheduledQueryOnRun(project, location, &cp, run); stop {
writeAPIError(h.logger(), w, st, m)
return
}
h.mu.Lock()
h.runs[run.Name] = run
out := *run
h.mu.Unlock()
writeJSON(h.logger(), w, http.StatusOK, out)
}
func (h *Handler) handleGetRun(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
location := r.PathValue("location")
configID := r.PathValue("configId")
runID := r.PathValue("runId")
name := runName(project, location, configID, runID)
h.mu.Lock()
defer h.mu.Unlock()
run, ok := h.runs[name]
if !ok {
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferRun "+runID)
return
}
out := *run
writeJSON(h.logger(), w, http.StatusOK, out)
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datatransfer
import (
"errors"
"fmt"
"net/http"
"strings"
"time"
)
type startManualTransferRunsResponse struct {
Runs []transferRunResource `json:"runs"`
}
func (h *Handler) allocRunID() string {
n := h.nextRunID.Add(1)
return fmt.Sprintf("run_%d", n)
}
func scheduledQueryText(params map[string]any) (string, error) {
if params == nil {
return "", errors.New("scheduled_query transfer config requires params.query")
}
raw, ok := params["query"]
if !ok || raw == nil {
return "", errors.New("scheduled_query transfer config requires params.query")
}
s, ok := raw.(string)
if !ok {
return "", errors.New("params.query must be a string")
}
s = strings.TrimSpace(s)
if s == "" {
return "", errors.New("params.query must be non-empty")
}
return s, nil
}
func destinationDatasetID(cfg *transferConfigResource) string {
if cfg == nil {
return ""
}
if cfg.DestinationDataset != nil && cfg.DestinationDataset.DatasetReference != nil {
if did := strings.TrimSpace(cfg.DestinationDataset.DatasetReference.DatasetID); did != "" {
return did
}
}
return strings.TrimSpace(cfg.DestinationDatasetID)
}
// handleConfigPostSegment dispatches AIP-136 custom-method POST
// endpoints (`{configId}:scheduleRuns`, `:checkValidCreds`,
// `:startManualRuns`). Go's net/http mux can't match a literal
// segment after a wildcard, so we register the parent
// `{configSeg}` and split on the trailing `:op`.
func (h *Handler) handleConfigPostSegment(w http.ResponseWriter, r *http.Request) {
seg := r.PathValue("configSeg")
id, action, ok := strings.Cut(seg, ":")
if !ok || id == "" || action == "" {
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found")
return
}
project := r.PathValue("projectId")
location := r.PathValue("location")
switch action {
case "scheduleRuns":
writeAPIError(h.logger(), w, http.StatusNotImplemented,
"scheduleRuns is not supported by the emulator (no backfill or cron execution)")
case "checkValidCreds":
if err := readOptionalJSONProbe(r); err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, err.Error())
return
}
// No live OAuth or vendor credential checks; clients can probe
// predictably.
writeJSON(h.logger(), w, http.StatusOK, map[string]any{"hasValidCreds": false})
case "startManualRuns":
if err := readOptionalJSONProbe(r); err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, err.Error())
return
}
h.handleStartManualRuns(w, project, location, id)
default:
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found")
}
}
func (h *Handler) handleStartManualRuns(w http.ResponseWriter, project, location, configID string) {
cfgName := configName(project, location, configID)
h.mu.Lock()
cfg, ok := h.configs[cfgName]
if !ok {
h.mu.Unlock()
writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+configID)
return
}
cp := *cfg
h.mu.Unlock()
ds := strings.TrimSpace(cp.DataSourceID)
if ds != dataSourceScheduledQuery {
writeAPIError(h.logger(), w, http.StatusNotImplemented,
fmt.Sprintf("manual runs for data source %q are not supported by the emulator", ds))
return
}
if h.Runner == nil {
writeAPIError(h.logger(), w, http.StatusNotImplemented,
"scheduled_query execution is not configured (emulator metadata-only mode)")
return
}
sql, err := scheduledQueryText(cp.Params)
if err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, err.Error())
return
}
defDS := destinationDatasetID(&cp)
if defDS == "" {
defDS = strings.TrimSpace(cp.DestinationDatasetID)
}
runID := h.allocRunID()
runFull := runName(project, location, configID, runID)
now := time.Now().UTC().Format(time.RFC3339Nano)
run := &transferRunResource{
Name: runFull,
DataSourceID: cp.DataSourceID,
Params: cp.Params,
DatasetRegion: cp.DatasetRegion,
ScheduleTime: now,
RunTime: now,
UpdateTime: now,
}
if cp.DestinationDataset != nil {
run.DestinationDataset = cp.DestinationDataset
}
if err := h.Runner.RunScheduledQueryTransfer(project, location, sql, defDS); err != nil {
run.State = transferStateFailed
run.Errors = []any{transferRunErrorPayload(err.Error())}
} else {
run.State = transferStateSucceeded
}
h.mu.Lock()
h.runs[runFull] = run
h.mu.Unlock()
out := startManualTransferRunsResponse{Runs: []transferRunResource{*run}}
writeJSON(h.logger(), w, http.StatusOK, out)
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datatransfer
import (
"encoding/json"
"io"
"net/http"
"sort"
"strconv"
"strings"
)
// emulatorDefaultTransferLocation matches the location BigQuery gapic
// REST clients assume when they POST to project-scoped
// `.../projects/{p}/transferConfigs` (no `/locations/` segment in the
// URL). Live BigQuery routes those to the multi-region `us`; the
// emulator stores them at the same key so the per-location LIST below
// can still surface them.
const emulatorDefaultTransferLocation = "us"
func (h *Handler) handleListConfigsProjectScoped(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
locPrefix := "projects/" + project + "/locations/"
dsFilter := parseDataSourceIDsFilter(r.URL.Query())
h.mu.Lock()
var keys []string
for k := range h.configs {
if strings.HasPrefix(k, locPrefix) && strings.Contains(k, "/transferConfigs/") {
keys = append(keys, k)
}
}
h.mu.Unlock()
sort.Strings(keys)
h.mu.Lock()
filtered := make([]string, 0, len(keys))
for _, k := range keys {
if c, ok := h.configs[k]; ok && configMatchesDataSourceFilter(c, dsFilter) {
filtered = append(filtered, k)
}
}
h.mu.Unlock()
start, end := pageWindow(len(filtered), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
pageKeys := filtered[start:end]
h.mu.Lock()
defer h.mu.Unlock()
out := make([]transferConfigResource, 0, len(pageKeys))
for _, k := range pageKeys {
if c, ok := h.configs[k]; ok {
out = append(out, *c)
}
}
resp := listConfigsResponse{TransferConfigs: out}
if end < len(filtered) {
resp.NextPageToken = strconv.Itoa(end)
}
writeJSON(h.logger(), w, http.StatusOK, resp)
}
// normalizeTransferConfigInput normalizes the destination oneof gapic
// REST clients send (`destinationDatasetId` and/or nested
// `destinationDataset.datasetReference`) so the in-memory record
// carries both wire forms.
func normalizeTransferConfigInput(projectID string, in *transferConfigResource) {
if in == nil {
return
}
if strings.TrimSpace(in.DestinationDatasetID) == "" &&
in.DestinationDataset != nil &&
in.DestinationDataset.DatasetReference != nil {
in.DestinationDatasetID = strings.TrimSpace(
in.DestinationDataset.DatasetReference.DatasetID)
}
did := strings.TrimSpace(in.DestinationDatasetID)
if did == "" {
return
}
if in.DestinationDataset == nil {
in.DestinationDataset = &struct {
DatasetReference *struct {
ProjectID string `json:"projectId,omitempty"`
DatasetID string `json:"datasetId,omitempty"`
} `json:"datasetReference,omitempty"`
}{
DatasetReference: &struct {
ProjectID string `json:"projectId,omitempty"`
DatasetID string `json:"datasetId,omitempty"`
}{ProjectID: projectID, DatasetID: did},
}
return
}
if in.DestinationDataset.DatasetReference == nil {
in.DestinationDataset.DatasetReference = &struct {
ProjectID string `json:"projectId,omitempty"`
DatasetID string `json:"datasetId,omitempty"`
}{ProjectID: projectID, DatasetID: did}
return
}
ref := in.DestinationDataset.DatasetReference
if strings.TrimSpace(ref.DatasetID) == "" {
ref.DatasetID = did
}
if strings.TrimSpace(ref.ProjectID) == "" {
ref.ProjectID = projectID
}
}
func (h *Handler) handleCreateConfigProjectScoped(w http.ResponseWriter, r *http.Request) {
project := r.PathValue("projectId")
body, err := io.ReadAll(r.Body)
if err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid body")
return
}
_ = r.Body.Close()
var in transferConfigResource
if len(strings.TrimSpace(string(body))) > 0 {
if err := json.Unmarshal(body, &in); err != nil {
writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid json: "+err.Error())
return
}
}
normalizeTransferConfigInput(project, &in)
h.finishCreateTransferConfig(w, project, emulatorDefaultTransferLocation, in)
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datatransfer
import "strconv"
// pageWindow returns the [start, end) slice into a sorted resource
// list for the given pageSize / pageToken query knobs. pageToken is
// an integer-as-string offset (the same scheme this handler emits and
// re-reads). Defaults: pageSize 100,
// max 1000, missing/invalid token resets to 0.
func pageWindow(lenNames int, pageSizeStr, pageToken string) (start, end int) {
pageSize := 0
if pageSizeStr != "" {
if n, err := strconv.Atoi(pageSizeStr); err == nil && n > 0 {
pageSize = n
}
}
if pageSize <= 0 || pageSize > 1000 {
pageSize = 100
}
if pageToken != "" {
if off, err := strconv.Atoi(pageToken); err == nil && off >= 0 && off < lenNames {
start = off
}
}
end = min(start+pageSize, lenNames)
return start, end
}
package handlers
import (
"encoding/json"
"io"
"net/http"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
func decodeDatasetBody(w http.ResponseWriter, r *http.Request) (bqtypes.Dataset, bool) {
var ds bqtypes.Dataset
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not read dataset request body: "+err.Error())
return ds, false
}
if len(body) == 0 {
return ds, true
}
if err := json.Unmarshal(body, &ds); err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not parse dataset request body as JSON: "+err.Error())
return ds, false
}
return ds, true
}
func decodeTableBody(w http.ResponseWriter, r *http.Request) (bqtypes.Table, bool) {
var t bqtypes.Table
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not read table request body: "+err.Error())
return t, false
}
if len(body) == 0 {
return t, true
}
if err := json.Unmarshal(body, &t); err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not parse table request body as JSON: "+err.Error())
return t, false
}
return t, true
}
package handlers
import (
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/external"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
// DepsOptions carries gateway-level settings threaded into handler deps.
type DepsOptions struct {
DataDir string
// DefaultDatasetID is the server-level fallback dataset for
// unqualified table names (see Dependencies.DefaultDatasetID).
DefaultDatasetID string
}
// BuildDependencies constructs the shared handler dependency bundle used by
// both the REST gateway and the public gRPC surface. When eng is nil
// (gateway-only mode / unit tests) Catalog and Query stay nil and
// handlers fall back to their NotImplemented stubs.
func BuildDependencies(eng *engine.Client) Dependencies {
return BuildDependenciesWith(eng, DepsOptions{})
}
// BuildDependenciesWith constructs deps with optional data-dir / external config.
func BuildDependenciesWith(eng *engine.Client, opts DepsOptions) Dependencies {
var extCfg *sourceconfig.Config
if opts.DataDir != "" {
if c, err := sourceconfig.Load(opts.DataDir); err == nil {
extCfg = c
}
}
deps := Dependencies{
Jobs: jobs.NewRegistry(),
Metadata: NewMetadataStore(),
Snapshots: NewSnapshotStore(),
Routines: NewRoutineStore(),
Models: NewModelStore(),
Sessions: NewSessionStore(),
DataDir: opts.DataDir,
DefaultDatasetID: opts.DefaultDatasetID,
ExternalSources: extCfg,
}
if eng != nil {
deps.Catalog = eng.Catalog
deps.Query = eng.Query
}
return deps
}
// externalResolver returns the materialization resolver for deps.
func externalResolver(deps Dependencies) *external.Resolver {
return external.NewResolver(deps.ExternalSources)
}
package handlers
import (
"net/http"
"sync"
)
// Discovery implements the BigQuery v2 discovery endpoint:
//
// GET /discovery/v1/apis/bigquery/v2/rest
//
// Google API client libraries fetch a discovery document at startup to
// learn the method surface of the service they are talking to. The
// emulator serves a hand-written, minimal subset of the upstream
// discovery JSON that lists exactly the methods routed in
// [gateway.NewServer] (see docs/REST_API.md). It is deliberately small:
// just enough that a client library can find a `kind`, enumerate the
// method ids, and locate their paths/HTTP verbs.
//
// The shape follows Google's documented `discovery#restDescription`
// format. We do not claim parity with the upstream document for fields
// like `schemas`, `auth`, or `revision`; clients that depend on those
// should hit the real BigQuery discovery URL.
func Discovery(_ Dependencies) http.HandlerFunc {
doc := buildDiscoveryDocument()
return func(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, doc)
}
}
// buildDiscoveryDocument constructs the minimal restDescription served
// by the emulator. It is built once and reused for every request.
//
// The catalog of methods here is the authoritative list mirrored in
// docs/REST_API.md and gateway/server.go. Keep all three in sync when
// adding a new endpoint: add the mux entry, add the table row, and add
// a method entry here.
var buildDiscoveryDocument = sync.OnceValue(func() discoveryDocument {
return discoveryDocument{
Kind: discoveryKind,
Etag: "",
DiscoveryVersion: "v1",
ID: "bigquery:v2",
Name: "bigquery",
Version: "v2",
Title: "BigQuery API (emulator)",
Description: "Local BigQuery emulator REST surface. " +
"This discovery document lists only the methods the emulator " +
"actually routes; see docs/REST_API.md for the canonical mapping.",
Protocol: "rest",
RootURL: "",
ServicePath: "bigquery/v2/",
BasePath: "/bigquery/v2/",
BaseURL: "/bigquery/v2/",
BatchPath: "batch/bigquery/v2",
Parameters: commonParameters(),
Resources: map[string]discoveryResource{
discoveryResourceProjects: {
Methods: map[string]discoveryMethod{
discoveryMethodList: {
ID: "bigquery.projects.list",
Path: "projects",
HTTPMethod: http.MethodGet,
},
"getServiceAccount": {
ID: "bigquery.projects.getServiceAccount",
Path: "projects/{projectId}/serviceAccount",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
},
},
},
},
discoveryResourceDatasets: {
Methods: map[string]discoveryMethod{
discoveryMethodList: datasetsListMethod(),
discoveryMethodInsert: datasetsInsertMethod(),
discoveryMethodGet: datasetsGetMethod(),
discoveryMethodUpdate: datasetsUpdateMethod(),
discoveryMethodPatch: datasetsPatchMethod(),
discoveryMethodDelete: datasetsDeleteMethod(),
"undelete": datasetsUndeleteMethod(),
},
},
discoveryResourceTables: {
Methods: map[string]discoveryMethod{
discoveryMethodList: tablesListMethod(),
discoveryMethodInsert: tablesInsertMethod(),
discoveryMethodGet: tablesGetMethod(),
discoveryMethodUpdate: tablesUpdateMethod(),
discoveryMethodPatch: tablesPatchMethod(),
discoveryMethodDelete: tablesDeleteMethod(),
"getIamPolicy": tablesIamMethod("getIamPolicy"),
"setIamPolicy": tablesIamMethod("setIamPolicy"),
"testIamPermissions": tablesIamMethod("testIamPermissions"),
},
},
discoveryResourceTabledata: {
Methods: map[string]discoveryMethod{
discoveryMethodList: tabledataListMethod(),
"insertAll": tabledataInsertAllMethod(),
},
},
discoveryResourceJobs: {
Methods: map[string]discoveryMethod{
discoveryMethodList: jobsListMethod(),
discoveryMethodInsert: jobsInsertMethod(),
discoveryMethodGet: jobsGetMethod(),
"cancel": jobsCancelMethod(),
discoveryMethodDelete: jobsDeleteMethod(),
discoveryMethodQuery: jobsQueryMethod(),
"getQueryResults": jobsGetQueryResultsMethod(),
},
},
discoveryResourceModels: {
Methods: map[string]discoveryMethod{
discoveryMethodList: modelsListMethod(),
discoveryMethodGet: modelsGetMethod(),
discoveryMethodPatch: modelsPatchMethod(),
discoveryMethodDelete: modelsDeleteMethod(),
},
},
discoveryResourceRoutines: {
Methods: map[string]discoveryMethod{
discoveryMethodList: routinesListMethod(),
discoveryMethodInsert: routinesInsertMethod(),
discoveryMethodGet: routinesGetMethod(),
discoveryMethodUpdate: routinesUpdateMethod(),
discoveryMethodDelete: routinesDeleteMethod(),
},
},
discoveryResourceRowPolicy: {
Methods: map[string]discoveryMethod{
discoveryMethodList: rowAccessPoliciesListMethod(),
},
},
},
}
})
// discoveryKind is the kind value Google's discovery service stamps on
// every restDescription. The verification command (`jq .kind`) asserts
// this exact string, so it must not drift.
const discoveryKind = "discovery#restDescription"
// Discovery-document path-parameter names. The upstream BigQuery REST
// API exposes resources keyed off these {…} segments and the
// discovery JSON has to spell them out verbatim, which is why the same
// string repeats dozens of times across the method tables. Hoisted to
// consts so the JSON wire shape stays a single source of truth.
const (
paramProjectID = "projectId"
paramDatasetID = "datasetId"
paramTableID = "tableId"
paramJobID = "jobId"
paramModelID = "modelId"
paramRoutineID = "routineId"
)
// Discovery-document resource keys. These are the JSON-object keys
// inside the document's top-level `resources` map; client libraries
// dispatch on them to find the method tables for each REST resource.
const (
discoveryResourceProjects = "projects"
discoveryResourceDatasets = "datasets"
discoveryResourceTables = "tables"
discoveryResourceTabledata = "tabledata"
discoveryResourceJobs = "jobs"
discoveryResourceModels = "models"
discoveryResourceRoutines = "routines"
discoveryResourceRowPolicy = "rowAccessPolicies"
discoveryMethodList = "list"
discoveryMethodGet = "get"
discoveryMethodInsert = "insert"
discoveryMethodUpdate = "update"
discoveryMethodPatch = "patch"
discoveryMethodDelete = "delete"
discoveryMethodQuery = "query"
discoveryParamTypeString = "string"
discoveryParamLocationPath = "path"
discoveryParamLocationQuery = "query"
)
// discoveryDocument is the trimmed-down restDescription served by the
// emulator. It models only the fields client libraries actually consult
// for routing; the upstream document also contains schemas, scopes,
// auth, and feature flags which the emulator does not need.
type discoveryDocument struct {
Kind string `json:"kind"`
Etag string `json:"etag,omitempty"`
DiscoveryVersion string `json:"discoveryVersion"`
ID string `json:"id"`
Name string `json:"name"`
Version string `json:"version"`
Title string `json:"title"`
Description string `json:"description,omitempty"`
Protocol string `json:"protocol"`
RootURL string `json:"rootUrl"`
ServicePath string `json:"servicePath"`
BasePath string `json:"basePath"`
BaseURL string `json:"baseUrl"`
BatchPath string `json:"batchPath,omitempty"`
Parameters map[string]discoveryParameter `json:"parameters,omitempty"`
Resources map[string]discoveryResource `json:"resources"`
}
type discoveryResource struct {
Methods map[string]discoveryMethod `json:"methods"`
}
type discoveryMethod struct {
ID string `json:"id"`
Path string `json:"path"`
HTTPMethod string `json:"httpMethod"`
Description string `json:"description,omitempty"`
ParameterOrder []string `json:"parameterOrder,omitempty"`
Parameters map[string]discoveryParameter `json:"parameters,omitempty"`
}
type discoveryParameter struct {
Type string `json:"type"`
Location string `json:"location"`
Required bool `json:"required,omitempty"`
Description string `json:"description,omitempty"`
}
// pathString returns a required string path parameter with the given
// name. It is a small ergonomic helper so the method tables below stay
// readable.
func pathString(name string) discoveryParameter {
return discoveryParameter{
Type: discoveryParamTypeString,
Location: discoveryParamLocationPath,
Required: true,
Description: name,
}
}
// commonParameters are the Google-standard query parameters every
// method accepts. We only declare the handful BigQuery clients actually
// pass; the full upstream list is much longer.
func commonParameters() map[string]discoveryParameter {
return map[string]discoveryParameter{
"alt": {
Type: discoveryParamTypeString,
Location: discoveryParamLocationQuery,
Description: "Data format for the response.",
},
"prettyPrint": {
Type: "boolean",
Location: discoveryParamLocationQuery,
Description: "Returns response with indentations and line breaks.",
},
"key": {
Type: discoveryParamTypeString,
Location: discoveryParamLocationQuery,
Description: "API key. Ignored by the emulator.",
},
"access_token": {
Type: discoveryParamTypeString,
Location: discoveryParamLocationQuery,
Description: "OAuth access token. Ignored by the emulator.",
},
}
}
package handlers
import "net/http"
// The dataset / table / job method definitions are factored into
// individual helpers so each method's parameter set stays grouped with
// its path and id. They are not exported because they are only used to
// populate buildDiscoveryDocument.
func datasetsListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.datasets.list",
Path: "projects/{projectId}/datasets",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID},
Parameters: map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
}
}
func datasetsInsertMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.datasets.insert",
Path: "projects/{projectId}/datasets",
HTTPMethod: http.MethodPost,
ParameterOrder: []string{paramProjectID},
Parameters: map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
}
}
func datasetsGetMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.datasets.get",
Path: "projects/{projectId}/datasets/{datasetId}",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
},
}
}
func datasetsUpdateMethod() discoveryMethod {
m := datasetsGetMethod()
m.ID = "bigquery.datasets.update"
m.HTTPMethod = http.MethodPut
return m
}
func datasetsPatchMethod() discoveryMethod {
m := datasetsGetMethod()
m.ID = "bigquery.datasets.patch"
m.HTTPMethod = http.MethodPatch
return m
}
func datasetsDeleteMethod() discoveryMethod {
m := datasetsGetMethod()
m.ID = "bigquery.datasets.delete"
m.HTTPMethod = http.MethodDelete
return m
}
func datasetsUndeleteMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.datasets.undelete",
Path: "projects/{projectId}/datasets/{datasetId}:undelete",
HTTPMethod: http.MethodPost,
ParameterOrder: []string{paramProjectID, paramDatasetID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
},
}
}
func tableScopedParams() map[string]discoveryParameter {
return map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
paramTableID: pathString(paramTableID),
}
}
func tablesListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.tables.list",
Path: "projects/{projectId}/datasets/{datasetId}/tables",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
},
}
}
func tablesInsertMethod() discoveryMethod {
m := tablesListMethod()
m.ID = "bigquery.tables.insert"
m.HTTPMethod = http.MethodPost
return m
}
func tablesGetMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.tables.get",
Path: "projects/{projectId}/datasets/{datasetId}/tables/{tableId}",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
Parameters: tableScopedParams(),
}
}
func tablesUpdateMethod() discoveryMethod {
m := tablesGetMethod()
m.ID = "bigquery.tables.update"
m.HTTPMethod = http.MethodPut
return m
}
func tablesPatchMethod() discoveryMethod {
m := tablesGetMethod()
m.ID = "bigquery.tables.patch"
m.HTTPMethod = http.MethodPatch
return m
}
func tablesDeleteMethod() discoveryMethod {
m := tablesGetMethod()
m.ID = "bigquery.tables.delete"
m.HTTPMethod = http.MethodDelete
return m
}
func tablesIamMethod(op string) discoveryMethod {
return discoveryMethod{
ID: "bigquery.tables." + op,
Path: "projects/{projectId}/datasets/{datasetId}/tables/{tableId}:" + op,
HTTPMethod: http.MethodPost,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
Parameters: tableScopedParams(),
}
}
func tabledataListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.tabledata.list",
Path: "projects/{projectId}/datasets/{datasetId}/tables/{tableId}/data",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
Parameters: tableScopedParams(),
}
}
func tabledataInsertAllMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.tabledata.insertAll",
Path: "projects/{projectId}/datasets/{datasetId}/tables/{tableId}/insertAll",
HTTPMethod: http.MethodPost,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
Parameters: tableScopedParams(),
}
}
func jobsListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.jobs.list",
Path: "projects/{projectId}/jobs",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID},
Parameters: map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
}
}
func jobsInsertMethod() discoveryMethod {
m := jobsListMethod()
m.ID = "bigquery.jobs.insert"
m.HTTPMethod = http.MethodPost
return m
}
func jobsGetMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.jobs.get",
Path: "projects/{projectId}/jobs/{jobId}",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramJobID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramJobID: pathString(paramJobID),
},
}
}
func jobsCancelMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.jobs.cancel",
Path: "projects/{projectId}/jobs/{jobId}/cancel",
HTTPMethod: http.MethodPost,
ParameterOrder: []string{paramProjectID, paramJobID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramJobID: pathString(paramJobID),
},
}
}
func jobsDeleteMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.jobs.delete",
Path: "projects/{projectId}/jobs/{jobId}/delete",
HTTPMethod: http.MethodDelete,
ParameterOrder: []string{paramProjectID, paramJobID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramJobID: pathString(paramJobID),
},
}
}
func jobsQueryMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.jobs.query",
Path: "projects/{projectId}/queries",
HTTPMethod: http.MethodPost,
ParameterOrder: []string{paramProjectID},
Parameters: map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
}
}
func jobsGetQueryResultsMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.jobs.getQueryResults",
Path: "projects/{projectId}/queries/{jobId}",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramJobID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramJobID: pathString(paramJobID),
},
}
}
// modelScopedParams covers the path captures shared by every
// bigquery.models.* method that targets a specific model.
func modelScopedParams() map[string]discoveryParameter {
return map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
paramModelID: pathString(paramModelID),
}
}
func modelsListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.models.list",
Path: "projects/{projectId}/datasets/{datasetId}/models",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
},
}
}
func modelsGetMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.models.get",
Path: "projects/{projectId}/datasets/{datasetId}/models/{modelId}",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramModelID},
Parameters: modelScopedParams(),
}
}
func modelsPatchMethod() discoveryMethod {
m := modelsGetMethod()
m.ID = "bigquery.models.patch"
m.HTTPMethod = http.MethodPatch
return m
}
func modelsDeleteMethod() discoveryMethod {
m := modelsGetMethod()
m.ID = "bigquery.models.delete"
m.HTTPMethod = http.MethodDelete
return m
}
// routineScopedParams covers the path captures shared by every
// bigquery.routines.* method that targets a specific routine.
func routineScopedParams() map[string]discoveryParameter {
return map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
paramRoutineID: pathString(paramRoutineID),
}
}
func routinesListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.routines.list",
Path: "projects/{projectId}/datasets/{datasetId}/routines",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID},
Parameters: map[string]discoveryParameter{
paramProjectID: pathString(paramProjectID),
paramDatasetID: pathString(paramDatasetID),
},
}
}
func routinesInsertMethod() discoveryMethod {
m := routinesListMethod()
m.ID = "bigquery.routines.insert"
m.HTTPMethod = http.MethodPost
return m
}
func routinesGetMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.routines.get",
Path: "projects/{projectId}/datasets/{datasetId}/routines/{routineId}",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramRoutineID},
Parameters: routineScopedParams(),
}
}
func routinesUpdateMethod() discoveryMethod {
m := routinesGetMethod()
m.ID = "bigquery.routines.update"
m.HTTPMethod = http.MethodPut
return m
}
func routinesDeleteMethod() discoveryMethod {
m := routinesGetMethod()
m.ID = "bigquery.routines.delete"
m.HTTPMethod = http.MethodDelete
return m
}
func rowAccessPoliciesListMethod() discoveryMethod {
return discoveryMethod{
ID: "bigquery.rowAccessPolicies.list",
Path: "projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies",
HTTPMethod: http.MethodGet,
ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
Parameters: tableScopedParams(),
}
}
package handlers
import (
"net/http"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// queryGRPCToHTTPError translates a gRPC error returned by the engine's
// Query service into a BigQuery-shaped JSON error envelope and writes
// it to w. Returns true when err was non-nil (and therefore an error
// was written), so callers can use it as `if queryGRPCToHTTPError(...)
// { return }`.
//
// The mapping mirrors grpcToHTTPError but uses query-specific REST
// reason codes the BigQuery client libraries recognize:
//
// - INVALID_ARGUMENT → 400 invalidQuery (parse / analysis errors,
// unknown table or column references, type mismatches; see
// `frontend/handlers/query.cc::AnalyzeStatusToGrpc` and
// docs/REST_API.md "SQL dialect" for why analysis errors must
// carry `reason: invalidQuery` rather than the generic `invalid`).
// - NOT_FOUND → 404 notFound (a referenced table or dataset is
// missing; the engine usually wraps these as INVALID_ARGUMENT
// because GoogleSQL surfaces them through the analyzer, but
// storage-side NOT_FOUNDs from `DescribeTable` need their own
// mapping to keep parity with `tables.get`).
// - ALREADY_EXISTS → 409 duplicate (DDL/control-op conflicts such as
// UNDROP SCHEMA after recreating the same dataset id).
// - FAILED_PRECONDITION → 400 invalidQuery (the engine raises this
// when the catalog has not been initialized; the gateway folds it
// into the same 400 reason a client sees when the SQL itself is
// invalid because there is nothing actionable beyond "the
// emulator is not ready" and the BigQuery REST envelope has no
// dedicated code for that).
// - UNIMPLEMENTED → 501 notImplemented (the gateway is talking to a
// legacy engine build with `--googlesql=off`).
// - UNAVAILABLE / DEADLINE_EXCEEDED → 503 backendError /
// 504 backendError; same as `grpcToHTTPError`.
//
// Anything else (INTERNAL, plain Go errors) is reported as 500
// internalError so a misbehaving engine cannot be mistaken for a
// recoverable client-side issue.
func queryGRPCToHTTPError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
st, ok := status.FromError(err)
if !ok {
writeError(w, http.StatusInternalServerError, reasonInternalError,
"Engine RPC failed: "+err.Error())
return true
}
httpStatus, reason := http.StatusInternalServerError, reasonInternalError
switch st.Code() {
case codes.OK:
return false
case codes.InvalidArgument, codes.FailedPrecondition:
httpStatus, reason = http.StatusBadRequest, reasonInvalidQuery
case codes.NotFound:
httpStatus, reason = http.StatusNotFound, reasonNotFound
case codes.AlreadyExists:
httpStatus, reason = http.StatusConflict, reasonDuplicate
case codes.PermissionDenied:
httpStatus, reason = http.StatusForbidden, reasonAccessDenied
case codes.Unauthenticated:
httpStatus, reason = http.StatusUnauthorized, reasonAuthError
case codes.Unimplemented:
httpStatus, reason = http.StatusNotImplemented, reasonNotImplemented
case codes.Unavailable:
httpStatus, reason = http.StatusServiceUnavailable, reasonBackendError
case codes.DeadlineExceeded:
httpStatus, reason = http.StatusGatewayTimeout, reasonBackendError
case codes.ResourceExhausted:
httpStatus, reason = http.StatusTooManyRequests, reasonQuotaExceeded
}
writeError(w, httpStatus, reason, bqStyleMessage(st.Message()))
return true
}
package handlers
import (
"context"
"net/http"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/external"
)
// queryDefaultDatasetForExecute resolves defaultDataset and materializes
// tableDefinitions before ExecuteQuery. Returns ok=false when an HTTP error
// was written.
func queryDefaultDatasetForExecute(
deps Dependencies,
w http.ResponseWriter,
r *http.Request,
projectID string,
req *bqtypes.QueryRequest,
) (string, bool) {
defaultDataset := resolveDefaultDataset(deps, req.DefaultDataset)
defaultDataset, extErr := prepareQueryExternalTables(
r.Context(), deps, projectID, req.TableDefinitions, defaultDataset)
if writeExternalTableError(w, extErr) {
return "", false
}
return defaultDataset, true
}
// prepareQueryExternalTables materializes ephemeral tableDefinitions and
// returns the default dataset id to forward to the engine. When err is
// non-nil the caller should emit an HTTP error (jobs.query) or record a
// failed job (jobs.insert).
func prepareQueryExternalTables(
ctx context.Context,
deps Dependencies,
projectID string,
tableDefs map[string]bqtypes.ExternalDataConfiguration,
defaultDataset string,
) (string, error) {
if len(tableDefs) == 0 || deps.Catalog == nil {
return defaultDataset, nil
}
return external.PrepareTableDefinitionsWith(ctx, deps.Catalog, projectID, tableDefs, defaultDataset,
externalResolver(deps))
}
// writeExternalTableError maps gateway-side external table failures to
// BigQuery-shaped HTTP responses for the synchronous query API.
func writeExternalTableError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
writeError(w, http.StatusBadRequest, reasonInvalidQuery,
"Could not prepare external table: "+err.Error())
return true
}
// insertExternalTable materializes a GCS-backed external table on insert.
// Returns false when an error response was written.
func insertExternalTable(
w http.ResponseWriter,
r *http.Request,
deps Dependencies,
projectID, datasetID, tableID string,
t *bqtypes.Table,
) bool {
if t.Type == "" {
t.Type = externalTableType
}
err := external.MaterializeWith(r.Context(), deps.Catalog, external.Target{
ProjectID: projectID,
DatasetID: datasetID,
TableID: tableID,
Schema: t.Schema,
}, t.ExternalDataConfiguration, externalResolver(deps))
return !writeExternalTableInsertError(w, err)
}
// writeExternalTableInsertError maps external table failures on tables.insert.
func writeExternalTableInsertError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not create external table: "+err.Error())
return true
}
package handlers
import (
"net/http"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
const enginePolicyFederatedURL = "docs/ENGINE_POLICY.md#external-query-and-federated-sources"
func writeUnsupportedFederatedEnvelope(w http.ResponseWriter, feature string) {
writeError(w, http.StatusNotImplemented, "notImplemented",
feature+" is not supported by the BigQuery emulator (fixture-backed EXTERNAL_QUERY only; see "+
enginePolicyFederatedURL+").")
}
// rejectUnsupportedTablePosture returns true when the handler wrote a 501.
func rejectUnsupportedTablePosture(w http.ResponseWriter, t *bqtypes.Table) bool {
if t == nil {
return false
}
if t.BiglakeConfiguration != nil {
writeUnsupportedFederatedEnvelope(w,
"BigLake tables (biglakeConfiguration)")
return true
}
if t.ObjectTableOptions != nil {
writeUnsupportedFederatedEnvelope(w, "Object tables (objectTableOptions)")
return true
}
if t.ExternalDataConfiguration != nil {
src := strings.ToUpper(strings.TrimSpace(t.ExternalDataConfiguration.SourceFormat))
if src == "OBJECT_TABLE" {
writeUnsupportedFederatedEnvelope(w, "Object tables (OBJECT_TABLE sourceFormat)")
return true
}
}
return false
}
// rejectUnsupportedDatasetPosture returns true when the handler wrote a 501.
func rejectUnsupportedDatasetPosture(w http.ResponseWriter, ds *bqtypes.Dataset) bool {
if ds == nil || ds.ExternalDatasetReference == nil {
return false
}
writeUnsupportedFederatedEnvelope(w,
"External datasets (Spanner / Cloud SQL externalDatasetReference)")
return true
}
// Package handlers contains HTTP handlers for the BigQuery REST surface.
//
// At this stage of the project most handlers are intentional stubs that
// return http.StatusNotImplemented. They exist so that:
//
// - The route table in gateway/server.go is exhaustive and easy to scan,
// which doubles as a checklist for the gateway-HTTP-surface section of
// ROADMAP.md.
// - Client libraries get a structurally-valid BigQuery error envelope
// instead of a 404 when they hit something we have not implemented yet.
// - Each handler can be flipped to a real implementation in isolation.
package handlers
import (
"encoding/json"
"net/http"
"os"
"regexp"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/models"
"github.com/vantaboard/bigquery-emulator/gateway/routines"
"github.com/vantaboard/bigquery-emulator/gateway/session"
"github.com/vantaboard/bigquery-emulator/gateway/snapshots"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// Dependencies bundles everything a handler might need to reach (engine
// gRPC client, in-memory catalog, logger, etc.). It grows as the gateway
// wires in real backends.
//
// Catalog and Query are the engine-side gRPC clients defined in
// proto/emulator.proto; both are nil when the gateway is started with
// --engine_binary="" (gateway-only / unit-test mode) and handlers
// must nil-check before dispatching to them.
type Dependencies struct {
// Catalog is the gRPC client used by datasets/tables/tabledata
// handlers to mirror catalog mutations into the engine.
Catalog enginepb.CatalogClient
// Query is the gRPC client used by jobs.query and the query branch
// of jobs.insert to forward SQL execution to the engine.
Query enginepb.QueryClient
// Jobs is the in-memory job registry the synchronous jobs.query
// handler records DONE jobs in, and that future jobs.get /
// jobs.list handlers will read back from. When nil (legacy unit
// tests that predate the registry), QueryRun lazily mints a
// per-handler fallback so behavior stays compatible.
Jobs *jobs.Registry
// Metadata caches REST-only Dataset/Table fields the engine
// does not yet persist (labels, defaultCollation, expirationTime,
// rangePartitioning, clustering, ...). Insert/Patch/Update
// populate it; Get reads it back and merges with the engine
// response. Nil is treated as a no-op store so legacy unit
// tests that do not opt in keep their echo posture.
Metadata *MetadataStore
// Snapshots retains deleted-table row captures for COPY jobs that
// reference table@epoch decorators (undelete samples). Nil is a
// no-op store.
Snapshots *snapshots.Store
// Routines is the in-memory UDF / TVF / procedure registry REST
// handlers use for routines.* and DDL query jobs register into.
// Nil is treated as a per-handler fallback store.
Routines *routines.Store
// Models is the in-memory BQML metadata registry REST handlers use
// for models.* and CREATE MODEL DDL query jobs register into.
// Nil is treated as a per-handler fallback store.
Models *models.Store
// Sessions is the in-memory BigQuery session registry used when
// queries request createSession or pass connectionProperties
// session_id. Nil is treated as a per-handler fallback store.
Sessions *session.Store
// DataDir is the engine persistent storage root (--data_dir). Used
// to resolve external source fixture/local/live modes.
DataDir string
// DefaultDatasetID is the server-level fallback dataset used to
// resolve unqualified (single-segment) table names when a query or
// job does not carry its own `defaultDataset`. Mirrors setting
// `default_dataset` on a production BigQuery client/job. Empty
// means no fallback, so bare table names error exactly like
// production BigQuery with no default dataset configured.
DefaultDatasetID string
// ExternalSources configures per-source fixture|local|live resolution.
// Nil uses package defaults (GCS local, Sheets fixture).
ExternalSources *sourceconfig.Config
}
// NewRoutineStore returns an empty routine registry for gateway deps.
func NewRoutineStore() *routines.Store {
return routines.NewStore()
}
// NewModelStore returns an empty model metadata registry for gateway deps.
func NewModelStore() *models.Store {
return models.NewStore()
}
// NewSessionStore returns an empty session registry for gateway deps.
func NewSessionStore() *session.Store {
return session.NewStore()
}
// NewSnapshotStore returns an empty table snapshot store for gateway deps.
func NewSnapshotStore() *snapshots.Store {
return snapshots.NewStore()
}
// Health is a trivial liveness endpoint useful for `docker-compose`
// health checks and CI smoke tests.
func Health(w http.ResponseWriter, r *http.Request) {
writeJSON(w, http.StatusOK, map[string]string{
"status": "ok",
"service": "bigquery-emulator",
})
}
// NotImplemented returns a BigQuery-shaped 501 response. Used by routes
// that are registered but not yet implemented.
func NotImplemented(w http.ResponseWriter, r *http.Request) {
writeError(w, http.StatusNotImplemented, reasonNotImplemented,
"This BigQuery emulator route is registered but not yet implemented. "+
"See ROADMAP.md.")
}
// NotFound is the catch-all handler for paths not in the route table. It
// returns a BigQuery-shaped 404 so client libraries see a structured error.
func NotFound(w http.ResponseWriter, r *http.Request) {
writeError(w, http.StatusNotFound, reasonNotFound,
"No route matches "+r.Method+" "+r.URL.Path+".")
}
// splitColonOp splits an AIP-136 custom-method path segment of the form
// "{resource}:{op}" into its resource and op halves. If there is no colon
// the op is returned empty and the input is the resource. This is how the
// emulator dispatches BigQuery REST custom methods like
// `datasets/{datasetId}:undelete` and `tables/{tableId}:getIamPolicy`,
// because Go's net/http mux cannot match a literal segment after a
// wildcard.
func splitColonOp(segment string) (resource, op string) {
for i := range len(segment) {
if segment[i] == ':' {
return segment[:i], segment[i+1:]
}
}
return segment, ""
}
// errorEnvelope matches the shape BigQuery returns for non-2xx responses.
// See https://cloud.google.com/bigquery/docs/reference/rest -> error format.
type errorEnvelope struct {
Error errorBody `json:"error"`
}
type errorBody struct {
Code int `json:"code"`
Message string `json:"message"`
Errors []errorDetail `json:"errors,omitempty"`
Status string `json:"status,omitempty"`
}
type errorDetail struct {
Reason string `json:"reason"`
Message string `json:"message"`
Domain string `json:"domain,omitempty"`
}
func writeJSON(w http.ResponseWriter, status int, body any) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(body)
}
// writeLegacySQLError maps gateway/query legacy translation failures to
// BigQuery invalidQuery responses. Returns true when err was written.
func writeLegacySQLError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
return true
}
func writeError(w http.ResponseWriter, status int, reason, msg string) {
writeJSON(w, status, errorEnvelope{
Error: errorBody{
Code: status,
Message: msg,
Status: reason,
Errors: []errorDetail{{
Reason: reason,
Message: msg,
Domain: "global",
}},
},
})
}
// grpcToHTTPError translates a gRPC error returned by the engine into
// the BigQuery-shaped JSON error envelope and writes it to w. Returns
// true when err was non-nil (and therefore an error was written), so
// callers can use it as `if grpcToHTTPError(...) { return }`.
//
// The mapping mirrors the Storage→gRPC mapping in
// frontend/handlers/catalog.cc: NOT_FOUND → 404 notFound,
// ALREADY_EXISTS → 409 duplicate, INVALID_ARGUMENT → 400 invalid,
// FAILED_PRECONDITION → 400 failedPrecondition, UNIMPLEMENTED → 501
// notImplemented, UNAVAILABLE → 503 backendError. Anything else
// (INTERNAL, plain Go errors) is reported as 500 internalError so a
// misbehaving engine cannot be mistaken for a 404 on the wire.
//
// The error message itself is rewritten into BigQuery's canonical
// shape via bqStyleMessage so client-side assertions like
// `expect(err.message).to.include('Not found')` and
// `expect(err.message).to.include('Already Exists')` match the live
// surface.
func grpcToHTTPError(w http.ResponseWriter, err error) bool {
if err == nil {
return false
}
st, ok := status.FromError(err)
if !ok {
writeError(w, http.StatusInternalServerError, reasonInternalError,
"Engine RPC failed: "+err.Error())
return true
}
httpStatus, reason := http.StatusInternalServerError, reasonInternalError
switch st.Code() {
case codes.OK:
return false
case codes.NotFound:
httpStatus, reason = http.StatusNotFound, reasonNotFound
case codes.AlreadyExists:
httpStatus, reason = http.StatusConflict, reasonDuplicate
case codes.InvalidArgument:
httpStatus, reason = http.StatusBadRequest, reasonInvalid
case codes.FailedPrecondition:
httpStatus, reason = http.StatusBadRequest, reasonFailedPrecondition
case codes.PermissionDenied:
httpStatus, reason = http.StatusForbidden, reasonAccessDenied
case codes.Unauthenticated:
// The emulator never authenticates so this is unlikely, but
// map it so a buggy engine doesn't crash through to 500.
httpStatus, reason = http.StatusUnauthorized, reasonAuthError
case codes.Unimplemented:
httpStatus, reason = http.StatusNotImplemented, reasonNotImplemented
case codes.Unavailable:
httpStatus, reason = http.StatusServiceUnavailable, reasonBackendError
case codes.DeadlineExceeded:
httpStatus, reason = http.StatusGatewayTimeout, reasonBackendError
case codes.ResourceExhausted:
httpStatus, reason = http.StatusTooManyRequests, reasonQuotaExceeded
}
writeError(w, httpStatus, reason, bqStyleMessage(st.Message()))
return true
}
// bqInvalidTimestampStringMsg is BigQuery's wire message for rejected
// TIMESTAMP parameter / wire-string values (see params_timestamp_reject).
const bqInvalidTimestampStringMsg = "Invalid timestamp string"
// notFoundResourceRE / alreadyExistsResourceRE match the engine's
// canonical storage-layer error strings produced by DuckDBStorage
// (see backend/storage/duckdb/duckdb_storage.cc): "<noun> not found:
// <project>.<dataset>[.<table>]" and "<noun> already exists:
// <project>.<dataset>[.<table>]" where <noun> is "table" or
// "dataset". The resource path uses `.` between every segment on the
// engine side; BigQuery REST uses `:` between project and dataset and
// `.` between dataset and table. The captured suffix is rewritten to
// the REST shape and the noun is capitalised so client assertions for
// "Not found" / "Already Exists" prefixes (live BigQuery's canonical
// shape) match.
var (
notFoundResourceRE = regexp.MustCompile(
`^(table|dataset) not found: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
alreadyExistsResourceRE = regexp.MustCompile(
`^(table|dataset) already exists: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
)
// bqStyleMessage rewrites the small set of engine-side storage errors
// the gateway forwards into BigQuery's canonical wire shape. Examples:
//
// "table not found: dev.foo.bar" -> "Not found: Table dev:foo.bar"
// "dataset not found: dev.foo" -> "Not found: Dataset dev:foo"
// "table already exists: dev.foo.bar" -> "Already Exists: Table dev:foo.bar"
// "dataset already exists: dev.foo" -> "Already Exists: Dataset dev:foo"
//
// Any message that does not match a known pattern passes through
// verbatim so non-storage errors (analysis failures, etc.) keep their
// engine-side wording. The regexes anchor on `^...$` to avoid matching
// embedded substrings and accept only the two storage nouns the engine
// emits today; future additions go here as the catalog grows.
func bqStyleMessage(msg string) string {
if m := notFoundResourceRE.FindStringSubmatch(msg); m != nil {
return bqStyleResourceMessage("Not found", m[1], m[2], m[3], m[4])
}
if m := alreadyExistsResourceRE.FindStringSubmatch(msg); m != nil {
return bqStyleResourceMessage("Already Exists", m[1], m[2], m[3], m[4])
}
if strings.HasPrefix(msg, "semantic: invalid TIMESTAMP parameter value ") ||
strings.HasPrefix(msg, "semantic: invalid TIMESTAMP value ") {
return bqInvalidTimestampStringMsg
}
return msg
}
// bqStyleResourceMessage assembles "<verb>: <Noun> <project>:<dataset>[.<table>]".
// `table` is empty when the engine matched the dataset variant.
func bqStyleResourceMessage(verb, noun, project, dataset, table string) string {
resource := project + ":" + dataset
if table != "" {
resource += "." + table
}
switch noun {
case "table":
return verb + ": Table " + resource
case "dataset":
return verb + ": Dataset " + resource
default:
// Unreachable given the regex character class, but keep a
// defensive fall-through so a future regex tweak that adds a
// new noun without a switch arm cannot silently lose the
// rewrite.
return verb + ": " + noun + " " + resource
}
}
// requestEmulatorBaseURL returns the absolute emulator REST origin for
// resumable upload Location headers (scheme + host, no trailing slash).
func requestEmulatorBaseURL(r *http.Request) string {
if host := strings.TrimSpace(os.Getenv("BIGQUERY_EMULATOR_HOST")); host != "" {
host = strings.TrimRight(host, "/")
if !strings.Contains(host, "://") {
host = "http://" + strings.TrimPrefix(host, "//")
}
return host
}
if r == nil {
return ""
}
scheme := "http"
if r.TLS != nil {
scheme = "https"
}
if fwd := strings.TrimSpace(r.Header.Get("X-Forwarded-Proto")); fwd != "" {
scheme = strings.TrimSpace(strings.Split(fwd, ",")[0])
}
host := r.Host
if fwdHost := strings.TrimSpace(r.Header.Get("X-Forwarded-Host")); fwdHost != "" {
host = strings.TrimSpace(strings.Split(fwdHost, ",")[0])
}
if host == "" {
return ""
}
return scheme + "://" + host
}
package handlers
import (
"encoding/json"
"io"
"net/http"
"strconv"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/load"
"github.com/vantaboard/bigquery-emulator/gateway/middleware"
"github.com/vantaboard/bigquery-emulator/gateway/query"
)
// jobListKind is the value the BigQuery REST API returns for the
// `kind` field of a JobList response. See
// docs/bigquery/docs/reference/rest/v2/jobs/list.md.
const jobListKind = "bigquery#jobList"
// jobCancelKind is the value of `kind` on a JobCancelResponse, the
// envelope `jobs.cancel` returns. The body wraps the updated Job.
// See docs/bigquery/docs/reference/rest/v2/jobs/cancel.md.
const jobCancelKind = "bigquery#jobCancelResponse"
// jobConfigurationKindQuery is the value of `configuration.jobType`
// for a query job. The wire schema spells the type discriminator in
// upper-case (QUERY / LOAD / COPY / EXTRACT); we round-trip it as the
// caller posts it but stamp it explicitly when the caller leaves it
// empty so a subsequent `jobs.get` doesn't lose the discriminator.
const (
jobConfigurationKindQuery = "QUERY"
jobConfigurationKindLoad = "LOAD"
jobConfigurationKindCopy = "COPY"
jobConfigurationKindExtract = "EXTRACT"
)
// queryParamTrue is the wire literal BigQuery's REST surface uses for
// boolean query parameters (e.g. `allUsers=true`, `deleteContents=true`).
// Promoted to a constant so the goconst lint counter does not flag
// the repeated literal across handlers.
const queryParamTrue = "true"
// JobList implements `bigquery.jobs.list`:
//
// GET /bigquery/v2/projects/{projectId}/jobs
//
// Supports the documented query parameters `allUsers`, `maxResults`,
// `minCreationTime`, `maxCreationTime`, `pageToken`, `projection`,
// `stateFilter`, and `parentJobId`. `allUsers=true` is rejected with
// a documented 501 because the emulator does not have an auth
// context to scope cross-user listings to; every other documented
// parameter is honored by `Registry.ListByProject`.
//
// The per-entry shape mirrors upstream's "minimal" projection
// (`kind`, `id`, `jobReference`, `state`, `status`, `statistics`,
// `configuration`, `user_email`); we surface the full registry Job
// today because the emulator's per-job payload is already small and
// projection-trimming has no behavioral upside before plan tp08
// inflates the schema.
func JobList(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
q := r.URL.Query()
if q.Get("allUsers") == queryParamTrue {
writeError(w, http.StatusNotImplemented, reasonNotImplemented,
"jobs.list with allUsers=true is not supported; "+
"the emulator has no auth context to scope cross-user "+
"listings.")
return
}
opts := jobs.ListOptions{
MaxResults: clampToInt(parseUintQuery(q, "maxResults", 0)),
PageToken: q.Get("pageToken"),
ParentJobID: q.Get("parentJobId"),
MinCreationTime: clampToInt64(parseUintQuery(q, "minCreationTime", 0)),
MaxCreationTime: clampToInt64(parseUintQuery(q, "maxCreationTime", 0)),
StateFilter: q["stateFilter"],
}
items, nextPageToken := deps.Jobs.ListByProject(projectID, opts)
resp := map[string]any{
resourceKeyKind: jobListKind,
"jobs": items,
}
if nextPageToken != "" {
resp["nextPageToken"] = nextPageToken
}
writeJSON(w, http.StatusOK, resp)
}
}
// JobInsert implements `bigquery.jobs.insert` (metadata-only variant):
//
// POST /bigquery/v2/projects/{projectId}/jobs
//
// The body is a Job resource with `configuration.{query|load|copy|
// extract}`. Query jobs execute synchronously through the engine;
// load / copy / extract dispatch and round-trip configuration with
// per-type statistics but defer byte-level work to plans tp08-04/05.
//
// For the query branch the handler:
//
// 1. Mints (or honors a caller-supplied) jobId on the inbound
// JobReference.
// 2. Forwards the SQL to `enginepb.Query.ExecuteQuery` -- the same
// RPC `QueryRun` (jobs.query) uses -- so the engine path is
// shared. The streamed schema / rows / dml stats are captured on
// the registry's `QueryResult` so a follow-up
// `jobs.getQueryResults` replays them.
// 3. Records the resulting Job in `deps.Jobs` so a subsequent
// `jobs.list` / `jobs.get` / `jobs.cancel` / `jobs.delete` can
// find it by id, then returns the Job verbatim with HTTP 200.
//
// Engine-side analysis errors (table not found, syntax error, ...)
// are captured into `Status.ErrorResult` instead of being surfaced
// as a 4xx — that mirrors BigQuery's `jobs.insert` contract, which
// always succeeds at the API level and reflects per-query failures
// through the Job's status. Transport-level failures (the engine
// process unreachable, `deps.Query` nil) still return 501 so unit-
// mode runs (`task emulator:run --engine_binary=""`) keep producing
// a structured error envelope.
func JobInsert(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not read job request body: "+err.Error())
return
}
var posted jobs.Job
if len(body) > 0 {
if err := json.Unmarshal(body, &posted); err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not parse job request body as JSON: "+err.Error())
return
}
}
cfg := posted.Configuration
if cfg == nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Job configuration is required.")
return
}
switch {
case cfg.Query != nil:
if deps.Query == nil {
NotImplemented(w, r)
return
}
runSyncQueryInsert(deps, w, r, &posted, cfg)
case cfg.Load != nil:
runSyncLoadInsert(deps, w, r, &posted, cfg)
case cfg.Copy != nil:
runSyncCopyInsert(deps, w, r, &posted, cfg)
case cfg.Extract != nil:
runSyncExtractInsert(deps, w, r, &posted, cfg)
default:
writeError(w, http.StatusNotImplemented, reasonNotImplemented,
"jobs.insert: configuration must include query, load, copy, or extract.")
}
}
}
// JobInsertUpload implements `bigquery.jobs.insert` (media-upload variant):
//
// POST /upload/bigquery/v2/projects/{projectId}/jobs
// PUT /upload/bigquery/v2/projects/{projectId}/jobs
//
// Selected via `?uploadType=multipart` or `?uploadType=resumable`. The
// emulator accepts both because the official client libraries pick one
// based on payload size.
func JobInsertUpload(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
store := load.DefaultUploadStore()
return func(w http.ResponseWriter, r *http.Request) {
switch r.Method {
case http.MethodPost:
handleJobInsertUploadPost(deps, store, w, r)
case http.MethodPut:
handleJobInsertUploadPut(deps, store, w, r)
default:
writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
"jobs.insert upload supports POST and PUT only")
}
}
}
// JobGet implements `bigquery.jobs.get`:
//
// GET /bigquery/v2/projects/{projectId}/jobs/{jobId}
//
// Looks up the job in `deps.Jobs` by jobId, returning the stored Job
// verbatim. Mismatched projectIds (URL path vs. stored reference) and
// missing entries both map to a BigQuery-shaped 404 so the upstream
// `not found` contract holds; the `location` query parameter, when
// set, is matched against the stored jobReference and a wrong
// location also returns 404 (mirroring the upstream behavior of
// hiding cross-region jobs behind the same envelope).
func JobGet(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
jobID := r.PathValue("jobId")
job, ok := deps.Jobs.Get(jobID)
if !ok || job.JobReference.ProjectID != projectID {
writeJobNotFound(w, projectID, jobID, "")
return
}
if loc := r.URL.Query().Get("location"); loc != "" &&
job.JobReference.Location != "" &&
loc != job.JobReference.Location {
writeJobNotFound(w, projectID, jobID, loc)
return
}
writeJSON(w, http.StatusOK, job)
}
}
// JobCancel implements `bigquery.jobs.cancel`:
//
// POST /bigquery/v2/projects/{projectId}/jobs/{jobId}/cancel
//
// Returns a `JobCancelResponse` (kind + job) per the upstream wire
// shape. The registry flips the job to DONE with CancelRequested=true
// for non-terminal entries; terminal jobs (DONE / cancelled) get the
// cancel-requested flag stamped but their state stays put — the
// upstream API is documented as idempotent.
func JobCancel(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
jobID := r.PathValue("jobId")
job, ok := deps.Jobs.Get(jobID)
if !ok || job.JobReference.ProjectID != projectID {
writeJobNotFound(w, projectID, jobID, "")
return
}
updated, ok := deps.Jobs.Cancel(jobID)
if !ok {
writeJobNotFound(w, projectID, jobID, "")
return
}
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: jobCancelKind,
"job": updated,
})
}
}
// JobDelete implements `bigquery.jobs.delete`:
//
// DELETE /bigquery/v2/projects/{projectId}/jobs/{jobId}/delete
//
// The literal `/delete` suffix is the upstream URL template, not a
// typo (see docs/bigquery/docs/reference/rest/v2/jobs/delete.md).
// Removes job metadata; if {jobId} is a script parent, child job
// metadata is also dropped in the same call. Returns HTTP 204 on
// success; 404 with the BigQuery error envelope when the jobId is
// unknown.
func JobDelete(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
jobID := r.PathValue("jobId")
job, ok := deps.Jobs.Get(jobID)
if !ok || job.JobReference.ProjectID != projectID {
writeJobNotFound(w, projectID, jobID, "")
return
}
if !deps.Jobs.Delete(jobID) {
writeJobNotFound(w, projectID, jobID, "")
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// clampToInt safely narrows a uint64 wire value (BigQuery REST
// transmits maxResults / page size as decimal strings parsed as
// uint64 here) into Go's platform-native int. Values above `math.
// MaxInt` saturate at the platform max so the gosec G115 narrowing
// guard does not need a per-call branch in every handler.
func clampToInt(v uint64) int {
if v > uint64(maxInt) {
return maxInt
}
return int(v)
}
// clampToInt64 saturates a uint64 at `math.MaxInt64` before narrowing
// to int64. The BigQuery REST surface documents creation timestamps
// as ms-since-epoch so the practical range stays well below 2^63, but
// the explicit guard keeps the gosec G115 lint clean.
func clampToInt64(v uint64) int64 {
if v > uint64(int64Max) {
return int64Max
}
return int64(v)
}
// maxInt and int64Max are platform constants used by the clamp
// helpers above. Spelled out here (instead of importing `math`) to
// keep the import surface minimal for the few callers that need
// them.
const (
maxInt = int(^uint(0) >> 1)
int64Max = int64(^uint64(0) >> 1)
)
// millisString converts t to BigQuery's wire timestamp format:
// decimal milliseconds since the Unix epoch. The handlers reach for
// this on per-call timestamps (`finalizeDoneJob`, `finalizeFailedJob`)
// the way the jobs package's `Statistics` block already serializes
// `creationTime` / `startTime` / `endTime`.
func millisString(t time.Time) string {
return strconv.FormatInt(t.UnixMilli(), 10)
}
// writeJobNotFound emits the BigQuery-shaped 404 envelope `jobs.get`,
// `jobs.cancel`, `jobs.delete`, and `jobs.getQueryResults` all return
// for an unknown job. When `location` is non-empty the message
// appends "in location <loc>" so the caller can tell a wrong-region
// lookup apart from a truly missing entry.
func writeJobNotFound(w http.ResponseWriter, projectID, jobID, location string) {
msg := "Not found: Job " + projectID + ":" + jobID
if location != "" {
msg += " in location " + location
}
writeError(w, http.StatusNotFound, reasonNotFound, msg)
}
// runSyncQueryInsert is the sync slice of `JobInsert`'s implementation.
// Pulled out of the handler closure so the inbound-body validation +
// auth gating stays a thin top-level switch (cyclop / funlen caps).
//
// The flow mirrors `runQueryExecute` (the `jobs.query` handler's
// engine call) so analysis / streaming errors funnel through the
// same gRPC-to-HTTP mapping. The single difference is that
// `JobInsert` always returns a Job on success, never the bare
// `QueryResponse` payload `jobs.query` emits — the upstream API
// surfaces row data only on the sync `jobs.query` and follow-up
// `jobs.getQueryResults` calls.
//
//nolint:funlen // mirrors runQueryExecute; abort-session + external-table branches add statements
func runSyncQueryInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
if cfg.DryRun {
runSyncQueryDryRunInsert(deps, w, r, posted, cfg)
return
}
if isMultiStatementScript(cfg.Query.Query) {
runSyncScriptQueryInsert(deps, w, r, posted, cfg)
return
}
projectID := r.PathValue("projectId")
job := newPendingJob(deps, projectID, posted, cfg)
job.UserEmail = principalEmailFromContext(r)
useLegacy := false
if cfg.Query.UseLegacySQL != nil {
useLegacy = *cfg.Query.UseLegacySQL
}
defaultDataset := resolveDefaultDataset(deps, cfg.Query.DefaultDataset)
defaultDataset, extErr := prepareQueryExternalTables(
r.Context(), deps, projectID, cfg.Query.TableDefinitions, defaultDataset)
if extErr != nil {
start := time.Now().UTC()
finalizeFailedJob(deps, job, start, extErr)
writeJSON(w, http.StatusOK, job)
return
}
if parseAbortSessionSQL(cfg.Query.Query) {
start := time.Now().UTC()
end := start
sessionInfo := sessionStore(&deps).Resolve(
projectID, posted.JobReference.Location, false, cfg.Query.ConnectionProperties)
finalizeDoneJob(deps, job, start, end, nil, nil, nil, "", "", nil, nil, sessionInfo, r)
writeJSON(w, http.StatusOK, job)
return
}
sql := expandQueryParamsInSQL(cfg.Query.Query, cfg.Query.QueryParameters)
bindParams := stripExpandedArrayParams(cfg.Query.Query, sql, cfg.Query.QueryParameters)
sql, sqlErr := query.PrepareEngineSQL(useLegacy, sql, projectID, defaultDataset)
if sqlErr != nil {
start := time.Now().UTC()
finalizeFailedJob(deps, job, start, sqlErr)
writeJSON(w, http.StatusOK, job)
return
}
sql, sqlErr = query.PrepareEngineSQLForJobs(r.Context(), deps.Catalog, deps.Jobs, projectID, sql)
if sqlErr != nil {
start := time.Now().UTC()
finalizeFailedJob(deps, job, start, sqlErr)
writeJSON(w, http.StatusOK, job)
return
}
engineReq := &enginepb.QueryRequest{
ProjectId: projectID,
DefaultDatasetId: defaultDataset,
Sql: sql,
UseLegacySql: false,
Parameters: parametersToEngineMap(bindParams),
PrincipalEmail: principalEmailFromContext(r),
}
start := time.Now().UTC()
stream, err := deps.Query.ExecuteQuery(r.Context(), engineReq)
if err != nil {
finalizeFailedJob(deps, job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, streamErr := drainSyncStream(stream)
if streamErr != nil {
finalizeFailedJob(deps, job, start, streamErr)
writeJSON(w, http.StatusOK, job)
return
}
restSchema := schemaFromProto(schema)
if err := query.AppendResults(r.Context(), deps.Catalog, cfg.Query, projectID, restSchema, rows); err != nil {
finalizeFailedJob(deps, job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
query.PersistDestinationMetadata(deps.Metadata, cfg.Query, projectID)
var ddlTarget *bqtypes.RoutineReference
if statementType == "CREATE_FUNCTION" || statementType == "CREATE_PROCEDURE" ||
statementType == "CREATE_TABLE_FUNCTION" {
ddlTarget = persistRoutineFromDDL(
r.Context(), &deps, projectID, defaultDataset, cfg.Query.Query)
}
if isCreateModelSQL(cfg.Query.Query) {
persistModelFromDDL(r.Context(), &deps, projectID, defaultDataset, cfg.Query.Query)
}
handleViewDDLAfterQuery(&deps, projectID, defaultDataset, cfg.Query.Query, statementType)
if cfg.Query.DestinationTable == nil && deps.Catalog != nil && len(rows) > 0 &&
(statementType == "" || statementType == statementTypeSelect) {
if dest, err := query.MaterializeImplicitDestination(
r.Context(), deps.Catalog, projectID, defaultDataset,
job.JobReference.JobID, restSchema, rows); err == nil {
cfg.Query.DestinationTable = dest
job.Configuration.Query.DestinationTable = dest
}
}
end := time.Now().UTC()
sessionInfo := sessionStore(&deps).Resolve(
projectID, posted.JobReference.Location,
queryJobCreateSession(cfg), queryJobConnectionProperties(cfg))
finalizeDoneJob(
deps,
job,
start,
end,
schema,
dmlStats,
rows,
statementType,
emulatorRoute,
emulatorPhases,
ddlTarget,
sessionInfo,
r,
)
writeJSON(w, http.StatusOK, job)
}
// runSyncQueryDryRunInsert handles jobs.insert with configuration.dryRun
// set. It forwards the SQL to enginepb.Query.DryRun and returns a DONE
// job whose statistics.totalBytesProcessed mirrors jobs.query dry-run.
func runSyncQueryDryRunInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
projectID := r.PathValue("projectId")
job := newPendingJob(deps, projectID, posted, cfg)
useLegacy := false
if cfg.Query.UseLegacySQL != nil {
useLegacy = *cfg.Query.UseLegacySQL
}
defaultDataset := resolveDefaultDataset(deps, cfg.Query.DefaultDataset)
defaultDataset, extErr := prepareQueryExternalTables(
r.Context(), deps, projectID, cfg.Query.TableDefinitions, defaultDataset)
if extErr != nil {
start := time.Now().UTC()
finalizeFailedJob(deps, job, start, extErr)
writeJSON(w, http.StatusOK, job)
return
}
sql, sqlErr := query.PrepareEngineSQL(useLegacy, cfg.Query.Query, projectID, defaultDataset)
if sqlErr != nil {
start := time.Now().UTC()
finalizeFailedJob(deps, job, start, sqlErr)
writeJSON(w, http.StatusOK, job)
return
}
engineReq := &enginepb.QueryRequest{
ProjectId: projectID,
DefaultDatasetId: defaultDataset,
Sql: sql,
UseLegacySql: false,
Parameters: parametersToEngineMap(cfg.Query.QueryParameters),
PrincipalEmail: principalEmailFromContext(r),
}
start := time.Now().UTC()
resp, err := deps.Query.DryRun(r.Context(), engineReq)
end := time.Now().UTC()
if err != nil {
finalizeFailedJob(deps, job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
job.Status.State = jobs.JobStateDone
jobs.ApplyDryRunStatistics(job, resp.GetEstimatedBytesProcessed(), start, end)
writeJSON(w, http.StatusOK, job)
}
// newPendingJob seeds the registry with a PENDING entry derived from
// the inbound `jobs.insert` body and returns the writable handle the
// rest of the flow stamps results onto. ProjectID always wins over
// the body's `jobReference.projectId` (URL path is authoritative);
// the caller-provided jobId, if any, is preserved verbatim.
func newPendingJob(deps Dependencies, projectID string, posted *jobs.Job, cfg *jobs.JobConfiguration) *jobs.Job {
jobID := posted.JobReference.JobID
if jobID == "" {
jobID = deps.Jobs.NewJobID()
}
if cfg.JobType == "" {
switch {
case cfg.Load != nil:
cfg.JobType = jobConfigurationKindLoad
case cfg.Copy != nil:
cfg.JobType = jobConfigurationKindCopy
case cfg.Extract != nil:
cfg.JobType = jobConfigurationKindExtract
default:
cfg.JobType = jobConfigurationKindQuery
}
}
job := &jobs.Job{
Kind: jobs.JobKind,
ID: projectID + ":" + jobID,
JobReference: bqtypes.JobReference{
ProjectID: projectID,
JobID: jobID,
Location: posted.JobReference.Location,
},
Status: jobs.Status{State: jobs.JobStatePending},
Statistics: jobs.Statistics{CreationTime: nowMillis()},
Configuration: cfg,
}
deps.Jobs.Register(job)
return job
}
// finalizeFailedJob flips a PENDING job to DONE + errorResult derived
// from the engine error and records the failure timestamps. The
// gateway leaves the message verbatim because BigQuery's REST surface
// surfaces analyzer errors with their raw position-tagged shape
// (e.g. "Unrecognized name: x [at 1:8]"); rewriting them would lose
// the column / row markers the upstream samples assert on.
//
// We deliberately leave `Status.Errors` nil: the upstream `jobs.insert`
// contract returns the job synchronously with a status envelope the
// caller polls later, and the official BigQuery Node client wraps any
// non-nil `status.errors` array into an `ApiError` immediately (see
// `@google-cloud/bigquery/src/bigquery.ts` -> createJob), which would
// turn an "engine reports analysis failure" into a thrown exception
// instead of a Job-with-error caller can inspect. `errorResult` is
// the right field for that single terminal error; clients that want
// the full list compose it from `errorResult` + any execution-time
// warnings (none today; the emulator runs jobs to completion).
func finalizeFailedJob(_ Dependencies, job *jobs.Job, start time.Time, err error) {
finalizeFailedJobWithReason(job, start, err, reasonInvalidQuery)
}
// finalizeFailedDataPlaneJob records load/copy/extract failures on the
// Job status envelope using reason "invalid" so Node/Python clients
// surface the parser/fetch message instead of a generic transport error.
func finalizeFailedDataPlaneJob(job *jobs.Job, start time.Time, err error) {
finalizeFailedJobWithReason(job, start, err, reasonInvalid)
}
func finalizeFailedJobWithReason(job *jobs.Job, start time.Time, err error, reason string) {
end := time.Now().UTC()
job.Status.State = jobs.JobStateDone
job.Status.ErrorResult = &bqtypes.ErrorProto{
Reason: reason,
Message: bqStyleMessage(err.Error()),
}
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
}
// finalizeDoneJob stamps the success terminus on a PENDING job and
// caches the streamed result on the registry entry so a follow-up
// `jobs.getQueryResults` replays the same schema + rows without a
// re-execute. The loopback gating on `EmulatorRoute` mirrors what
// `QueryRun` does: only loopback callers see the debug field.
func finalizeDoneJob(_ Dependencies, job *jobs.Job, start, end time.Time,
schema *enginepb.TableSchema, dmlStats *enginepb.DmlStats, rows []bqtypes.Row,
statementType, emulatorRoute string, emulatorPhases map[string]int64,
ddlTarget *bqtypes.RoutineReference,
sessionInfo *bqtypes.SessionInfo, r *http.Request,
) {
job.Status.State = jobs.JobStateDone
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
job.Statistics.TotalBytesProcessed = "0"
stampJobSessionInfo(job, sessionInfo)
stampQueryJobDestination(job.JobReference.ProjectID, job, statementType)
restSchema := schemaFromProto(schema)
restDmlStats := dmlStatsFromProto(dmlStats)
visibleRoute := ""
visiblePhases := map[string]int64(nil)
if middleware.IsLoopback(r.Context()) {
visibleRoute = emulatorRoute
visiblePhases = emulatorPhases
}
if statementType != "" || visibleRoute != "" || len(visiblePhases) > 0 || ddlTarget != nil {
job.Statistics.Query = &bqtypes.JobStatistics2{
StatementType: statementType,
EmulatorRoute: visibleRoute,
EmulatorPhases: visiblePhases,
DdlTargetRoutine: ddlTarget,
}
}
job.Result = &jobs.QueryResult{
Schema: restSchema,
Rows: rows,
DmlStats: restDmlStats,
StatementType: statementType,
EmulatorRoute: visibleRoute,
EmulatorPhases: visiblePhases,
DdlTargetRoutine: ddlTarget,
}
}
package handlers
import (
"errors"
"io"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// drainSyncStream is the JobInsert flavor of `streamQueryResults`:
// same proto contract, different error reporting. Whereas
// `streamQueryResults` writes an HTTP envelope and returns an
// `ok=false` short-circuit, this helper returns the raw stream error
// so the caller can fold it into the Job's status.
func drainSyncStream(stream enginepb.Query_ExecuteQueryClient) (
*enginepb.TableSchema, *enginepb.DmlStats, []bqtypes.Row, string, string, map[string]int64, error,
) {
var schema *enginepb.TableSchema
var dmlStats *enginepb.DmlStats
var statementType string
var emulatorRoute string
var emulatorPhases map[string]int64
rows := make([]bqtypes.Row, 0)
for {
msg, err := stream.Recv()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return nil, nil, nil, "", "", nil, err
}
if s := msg.GetSchema(); s != nil {
if schema == nil {
schema = s
}
continue
}
if d := msg.GetDmlStats(); d != nil {
if dmlStats == nil {
dmlStats = d
}
continue
}
if st := msg.GetStatementType(); st != "" {
if statementType == "" {
statementType = st
}
continue
}
if er := msg.GetEmulatorRoute(); er != "" {
if emulatorRoute == "" {
emulatorRoute = er
}
continue
}
if pt := msg.GetPhaseTimings(); pt != nil && len(pt.GetPhases()) > 0 {
if emulatorPhases == nil {
emulatorPhases = make(map[string]int64, len(pt.GetPhases()))
}
for _, phase := range pt.GetPhases() {
if phase.GetName() != "" {
emulatorPhases[phase.GetName()] = phase.GetDurationUs()
}
}
continue
}
rows = append(rows, bqtypes.CellsToRowForSchema(msg.GetCells(), schema))
}
return schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, nil
}
// defaultDatasetID extracts the dataset ID from an optional
// `defaultDataset` reference, returning empty when the field is
// absent. The wire field on the engine carries the dataset ID only;
// the project comes from `project_id`.
func defaultDatasetID(ref *bqtypes.DatasetReference) string {
if ref == nil {
return ""
}
return ref.DatasetID
}
// resolveDefaultDataset picks the effective default dataset for a
// query/job: the request's own `defaultDataset` wins, and when it is
// absent the gateway falls back to the server-level default
// (`--dataset`). This mirrors a production BigQuery client that sets
// `default_dataset` once and omits it on individual requests. An empty
// result means no default (bare table names error like production).
func resolveDefaultDataset(deps Dependencies, ref *bqtypes.DatasetReference) string {
if ds := defaultDatasetID(ref); ds != "" {
return ds
}
return deps.DefaultDatasetID
}
package handlers
import (
"encoding/json"
"io"
"net/http"
"strconv"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/copy"
"github.com/vantaboard/bigquery-emulator/gateway/extract"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/load"
)
// runSyncLoadInsert accepts a load-job body, fetches source bytes, parses
// supported formats, and bulk-inserts into the destination table.
func runSyncLoadInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
projectID := r.PathValue("projectId")
job := newPendingJob(deps, projectID, posted, cfg)
start := time.Now().UTC()
if deps.Catalog == nil {
finalizeDeferredDataPlaneJob(job, cfg, start, "load")
writeJSON(w, http.StatusOK, job)
return
}
result, err := load.Execute(r.Context(), deps.Catalog, cfg.Load, projectID)
if err != nil {
finalizeFailedDataPlaneJob(job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
persistLoadTableMetadata(deps, cfg.Load, projectID)
finalizeSuccessfulLoadJob(job, start, result)
writeJSON(w, http.StatusOK, job)
}
// runSyncCopyInsert executes a copy job via engine SQL or catalog row copy.
func runSyncCopyInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
projectID := r.PathValue("projectId")
job := newPendingJob(deps, projectID, posted, cfg)
start := time.Now().UTC()
if deps.Catalog == nil {
finalizeDeferredDataPlaneJob(job, cfg, start, "copy")
writeJSON(w, http.StatusOK, job)
return
}
result, err := copy.Execute(r.Context(), deps.Catalog, deps.Query, deps.Snapshots, cfg.Copy, projectID)
if err != nil {
finalizeFailedDataPlaneJob(job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
persistCopyTableMetadata(deps, cfg.Copy, projectID)
finalizeSuccessfulCopyJob(job, start, result)
writeJSON(w, http.StatusOK, job)
}
// persistCopyTableMetadata stashes REST-only destination metadata from copy
// jobs (snapshot typing, expiration) so tables.get round-trips what the job
// supplied.
func persistCopyTableMetadata(deps Dependencies, cfg *jobs.JobConfigurationCopy, projectID string) {
if deps.Metadata == nil || cfg == nil || cfg.DestinationTable == nil {
return
}
op := copy.NormalizeOperationType(cfg.OperationType)
exp := strings.TrimSpace(cfg.DestinationExpirationTime)
if op == copy.OperationCopy && exp == "" {
return
}
destProject := cfg.DestinationTable.ProjectID
if destProject == "" {
destProject = projectID
}
patch := bqtypes.Table{}
if exp != "" {
patch.ExpirationTime = bqtypes.MillisTimestamp(exp)
}
switch op {
case copy.OperationSnapshot:
patch.Type = snapshotTableType
case copy.OperationRestore:
patch.Type = defaultTableType
}
deps.Metadata.MergeTable(destProject, cfg.DestinationTable.DatasetID,
cfg.DestinationTable.TableID, patch)
}
// runSyncExtractInsert reads table rows and uploads CSV/JSON to GCS.
func runSyncExtractInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
projectID := r.PathValue("projectId")
job := newPendingJob(deps, projectID, posted, cfg)
start := time.Now().UTC()
if deps.Catalog == nil {
finalizeDeferredDataPlaneJob(job, cfg, start, "extract")
writeJSON(w, http.StatusOK, job)
return
}
result, err := extract.Execute(r.Context(), deps.Catalog, cfg.Extract, projectID)
if err != nil {
finalizeFailedDataPlaneJob(job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
finalizeSuccessfulExtractJob(job, start, result)
writeJSON(w, http.StatusOK, job)
}
func finalizeSuccessfulCopyJob(job *jobs.Job, start time.Time, result copy.Result) {
end := time.Now().UTC()
job.Status.State = jobs.JobStateDone
job.Status.ErrorResult = nil
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
job.Statistics.Copy = copy.FormatStatistics(result)
}
func finalizeSuccessfulExtractJob(job *jobs.Job, start time.Time, result extract.Result) {
end := time.Now().UTC()
job.Status.State = jobs.JobStateDone
job.Status.ErrorResult = nil
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
job.Statistics.Extract = extract.FormatStatistics(result)
}
func finalizeSuccessfulLoadJob(job *jobs.Job, start time.Time, result load.Result) {
end := time.Now().UTC()
job.Status.State = jobs.JobStateDone
job.Status.ErrorResult = nil
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
job.Statistics.Load = load.FormatStatistics(result)
}
// persistLoadTableMetadata stashes REST-only destination metadata (CMEK,
// clustering, time partitioning) so tables.get round-trips what the load
// job supplied.
func persistLoadTableMetadata(deps Dependencies, cfg *jobs.JobConfigurationLoad, projectID string) {
if deps.Metadata == nil || cfg == nil || cfg.DestinationTable == nil {
return
}
if cfg.DestinationEncryptionConfiguration == nil &&
cfg.Clustering == nil && cfg.TimePartitioning == nil {
return
}
destProject := cfg.DestinationTable.ProjectID
if destProject == "" {
destProject = projectID
}
deps.Metadata.MergeTable(destProject, cfg.DestinationTable.DatasetID,
cfg.DestinationTable.TableID, bqtypes.Table{
EncryptionConfiguration: cfg.DestinationEncryptionConfiguration,
Clustering: cfg.Clustering,
TimePartitioning: cfg.TimePartitioning,
})
}
func finalizeDeferredDataPlaneJob(job *jobs.Job, cfg *jobs.JobConfiguration, start time.Time, kind string) {
end := time.Now().UTC()
job.Status.State = jobs.JobStateDone
job.Status.ErrorResult = &bqtypes.ErrorProto{
Reason: reasonNotImplemented,
Message: "jobs.insert: " + kind + " job data plane is unavailable; " +
"load / copy / extract execution requires an engine catalog connection.",
}
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
switch kind {
case "load":
inputFiles := "0"
if cfg.Load != nil {
inputFiles = strconv.Itoa(len(cfg.Load.SourceURIs))
}
job.Statistics.Load = &jobs.LoadStatistics{
InputFiles: inputFiles,
InputFileBytes: "0",
OutputRows: "0",
OutputBytes: "0",
BadRecords: "0",
}
case "copy":
job.Statistics.Copy = &jobs.CopyStatistics{
CopiedRows: "0",
CopiedLogicalBytes: "0",
}
case "extract":
var counts []string
if cfg.Extract != nil && len(cfg.Extract.DestinationURIs) > 0 {
counts = make([]string, len(cfg.Extract.DestinationURIs))
for i := range counts {
counts[i] = "0"
}
}
job.Statistics.Extract = &jobs.ExtractStatistics{
DestinationURIFileCounts: counts,
InputBytes: "0",
}
}
}
func handleJobInsertUploadPost(deps Dependencies, store *load.UploadStore,
w http.ResponseWriter, r *http.Request,
) {
uploadType := r.URL.Query().Get("uploadType")
switch uploadType {
case "multipart":
handleMultipartLoadUpload(deps, w, r)
case "resumable":
handleResumableLoadUploadInit(store, w, r)
default:
writeError(w, http.StatusBadRequest, reasonInvalid,
"uploadType must be multipart or resumable")
}
}
func handleMultipartLoadUpload(deps Dependencies, w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not read upload body: "+err.Error())
return
}
metadata, media, err := load.ParseMultipartJob(body, r.Header.Get("Content-Type"))
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
return
}
runUploadedLoadJob(deps, w, r, metadata, media)
}
func handleResumableLoadUploadInit(store *load.UploadStore, w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not read upload metadata: "+err.Error())
return
}
var total int64 = -1
if v := strings.TrimSpace(r.Header.Get("X-Upload-Content-Length")); v != "" {
total, err = strconv.ParseInt(v, 10, 64)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"invalid X-Upload-Content-Length")
return
}
}
uploadID := store.CreateSession(projectID, body, total)
w.Header().Set("Location", load.AbsoluteSessionLocation(
requestEmulatorBaseURL(r), projectID, uploadID))
w.WriteHeader(http.StatusOK)
}
func handleJobInsertUploadPut(deps Dependencies, store *load.UploadStore,
w http.ResponseWriter, r *http.Request,
) {
if r.URL.Query().Get("uploadType") != "resumable" {
writeError(w, http.StatusBadRequest, reasonInvalid,
"PUT upload requires uploadType=resumable")
return
}
uploadID := r.URL.Query().Get("upload_id")
if uploadID == "" {
writeError(w, http.StatusBadRequest, reasonInvalid, "upload_id is required")
return
}
sess := store.Get(uploadID)
if sess == nil {
writeError(w, http.StatusNotFound, reasonNotFound, "upload session not found")
return
}
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not read upload chunk: "+err.Error())
return
}
media, done, err := finalizeResumableChunk(store, uploadID, sess, r.Header.Get("Content-Range"), body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
return
}
if !done {
load.WriteResumeIncomplete(w, store.ReceivedBytes(uploadID))
return
}
store.Delete(uploadID)
runUploadedLoadJob(deps, w, r, sess.Metadata, media)
}
func finalizeResumableChunk(store *load.UploadStore, uploadID string, sess *load.UploadSession,
contentRange string, body []byte,
) (media []byte, done bool, err error) {
if contentRange != "" && len(body) == 0 {
return nil, false, nil
}
switch {
case contentRange != "":
return appendResumableRange(store, uploadID, contentRange, body)
case len(body) > 0:
if aerr := store.AppendBytes(uploadID, body, 0); aerr != nil {
return nil, false, aerr
}
sess = store.Get(uploadID)
return sess.Data, true, nil
default:
received := store.ReceivedBytes(uploadID)
if sess.Total > 0 && received < sess.Total {
return nil, false, nil
}
return sess.Data, true, nil
}
}
func appendResumableRange(store *load.UploadStore, uploadID, contentRange string, body []byte,
) ([]byte, bool, error) {
start, end, total, ok := load.ParseContentRange(contentRange)
if !ok {
return nil, false, errInvalidContentRange
}
if int64(len(body)) != end-start+1 {
return nil, false, errContentRangeLength
}
if err := store.AppendBytes(uploadID, body, start); err != nil {
return nil, false, err
}
received := store.ReceivedBytes(uploadID)
if total > 0 && received < total {
return nil, false, nil
}
sess := store.Get(uploadID)
return sess.Data, true, nil
}
var (
errInvalidContentRange = errUpload("invalid Content-Range")
errContentRangeLength = errUpload("Content-Range length mismatch")
)
type errUpload string
func (e errUpload) Error() string { return string(e) }
func runUploadedLoadJob(deps Dependencies, w http.ResponseWriter, r *http.Request,
metadata, media []byte,
) {
var posted jobs.Job
if len(metadata) > 0 {
if err := json.Unmarshal(metadata, &posted); err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not parse upload metadata as JSON: "+err.Error())
return
}
}
cfg := posted.Configuration
if cfg == nil || cfg.Load == nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"upload metadata must include configuration.load")
return
}
projectID := r.PathValue("projectId")
job := newPendingJob(deps, projectID, &posted, cfg)
start := time.Now().UTC()
if deps.Catalog == nil {
finalizeDeferredDataPlaneJob(job, cfg, start, "load")
writeJSON(w, http.StatusOK, job)
return
}
result, err := load.ExecuteFromBytes(r.Context(), deps.Catalog, cfg.Load, projectID, media)
if err != nil {
finalizeFailedDataPlaneJob(job, start, err)
writeJSON(w, http.StatusOK, job)
return
}
persistLoadTableMetadata(deps, cfg.Load, projectID)
finalizeSuccessfulLoadJob(job, start, result)
writeJSON(w, http.StatusOK, job)
}
package handlers
import (
"encoding/json"
"sync"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// MetadataStore caches REST-only Dataset/Table metadata that the engine
// catalog does not yet persist on the C++ side: `labels`,
// `defaultCollation`, `expirationTime`, `rangePartitioning`,
// `timePartitioning`, `clustering`, plus the smaller bookkeeping fields
// (`friendlyName`, `description`). The handler layer populates the
// store from Insert/Patch/Update bodies and reads it back in Get so
// client libraries observe the values they wrote.
//
// Lifetime: in-memory, per-gateway-process. Survives until the gateway
// restarts. For thirdparty test runs, the `THIRDPARTY_FRESH_VOLUME=1`
// path in `taskfiles/thirdparty.yml` wipes the engine volume on
// bringup, which intentionally aligns with the cache being empty at
// startup.
//
// Persistence is a separate (larger) plan: extending the engine
// `RegisterTable` / `DescribeTable` protos and the on-disk meta
// sidecar so values survive restart. Until then the in-memory store is
// the minimum-viable round-trip and is gated on the gateway speaking
// to the engine; gateway-only (`--engine_binary=""`) modes keep the
// echo posture they had before.
//
// Thread-safety: protected by an RWMutex. Lookups (the hot path for
// list samples) take the read lock; mutations take the write lock.
type MetadataStore struct {
mu sync.RWMutex
tables map[string]bqtypes.Table
datasets map[string]bqtypes.Dataset
}
// NewMetadataStore returns an empty, thread-safe MetadataStore.
func NewMetadataStore() *MetadataStore {
return &MetadataStore{
tables: map[string]bqtypes.Table{},
datasets: map[string]bqtypes.Dataset{},
}
}
func tableKey(projectID, datasetID, tableID string) string {
return projectID + ":" + datasetID + "." + tableID
}
func datasetKey(projectID, datasetID string) string {
return projectID + ":" + datasetID
}
// PutTable records the round-trippable metadata fields for a table.
// Only the REST-only fields (labels, expirationTime, rangePartitioning,
// clustering, defaultCollation, friendlyName, description, view, type,
// requirePartitionFilter) are kept; engine-owned fields like Schema /
// NumRows fall through to the engine's DescribeTable response on Get.
func (s *MetadataStore) PutTable(projectID, datasetID, tableID string, t bqtypes.Table) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
key := tableKey(projectID, datasetID, tableID)
existing, hadExisting := s.tables[key]
stored := stripEngineOwnedTableFields(t)
bumpTableTimestamps(&stored, existing, hadExisting)
s.tables[key] = stored
}
// MergeTable overlays sparse PATCH/UPDATE fields onto any cached entry.
func (s *MetadataStore) MergeTable(projectID, datasetID, tableID string, patch bqtypes.Table) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
key := tableKey(projectID, datasetID, tableID)
existing := s.tables[key]
merged := mergeTableMetadataOverlay(existing, stripEngineOwnedTableFields(patch))
bumpTableTimestamps(&merged, existing, true)
s.tables[key] = merged
}
// GetTable returns the cached REST-only metadata for the table and a
// bool indicating whether the entry was present. Callers must merge
// the result with the engine's DescribeTable response themselves to
// build the full GET shape.
func (s *MetadataStore) GetTable(projectID, datasetID, tableID string) (bqtypes.Table, bool) {
if s == nil {
return bqtypes.Table{}, false
}
s.mu.RLock()
defer s.mu.RUnlock()
t, ok := s.tables[tableKey(projectID, datasetID, tableID)]
return t, ok
}
// DeleteTable evicts the table entry so a subsequent Insert against
// the same ID does not surface stale metadata.
func (s *MetadataStore) DeleteTable(projectID, datasetID, tableID string) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
delete(s.tables, tableKey(projectID, datasetID, tableID))
}
func (s *MetadataStore) RestMetadataJSON(projectID, datasetID string) string {
if s == nil {
return ""
}
s.mu.RLock()
ds, ok := s.datasets[datasetKey(projectID, datasetID)]
s.mu.RUnlock()
if !ok {
return ""
}
stored := stripEngineOwnedDatasetFields(ds)
raw, err := json.Marshal(stored)
if err != nil {
return ""
}
return string(raw)
}
// RestoreDatasetRestMetadataJSON overlays gateway REST metadata from an
// engine tombstone snapshot (`UndeleteDatasetResponse.rest_metadata_json`).
func (s *MetadataStore) RestoreDatasetRestMetadataJSON(
projectID, datasetID, restMetadataJSON string,
) {
if s == nil || restMetadataJSON == "" {
return
}
var ds bqtypes.Dataset
if err := json.Unmarshal([]byte(restMetadataJSON), &ds); err != nil {
return
}
s.PutDataset(projectID, datasetID, ds)
}
// PutDataset records the round-trippable metadata fields for a dataset.
func (s *MetadataStore) PutDataset(projectID, datasetID string, ds bqtypes.Dataset) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
key := datasetKey(projectID, datasetID)
existing, hadExisting := s.datasets[key]
stored := stripEngineOwnedDatasetFields(ds)
bumpDatasetTimestamps(&stored, existing, hadExisting)
s.datasets[key] = stored
}
// MergeDataset overlays sparse PATCH/UPDATE fields onto any cached entry.
func (s *MetadataStore) MergeDataset(projectID, datasetID string, patch bqtypes.Dataset) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
key := datasetKey(projectID, datasetID)
existing := s.datasets[key]
merged := mergeDatasetMetadataOverlay(existing, stripEngineOwnedDatasetFields(patch))
bumpDatasetTimestamps(&merged, existing, true)
s.datasets[key] = merged
}
// GetDataset returns the cached REST-only metadata for the dataset.
func (s *MetadataStore) GetDataset(projectID, datasetID string) (bqtypes.Dataset, bool) {
if s == nil {
return bqtypes.Dataset{}, false
}
s.mu.RLock()
defer s.mu.RUnlock()
ds, ok := s.datasets[datasetKey(projectID, datasetID)]
return ds, ok
}
// DeleteDataset evicts the dataset entry. Does NOT cascade into the
// per-table entries: DatasetDelete with `deleteContents=true` does
// that explicitly because the handler knows the dataset's tables.
func (s *MetadataStore) DeleteDataset(projectID, datasetID string) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
delete(s.datasets, datasetKey(projectID, datasetID))
}
// DeleteTablesInDataset removes every cached table entry that belongs
// to the given dataset. Called from DatasetDelete when the caller sets
// `deleteContents=true` so a recreate with the same dataset ID does
// not inherit stale table metadata.
func (s *MetadataStore) DeleteTablesInDataset(projectID, datasetID string) {
if s == nil {
return
}
prefix := projectID + ":" + datasetID + "."
s.mu.Lock()
defer s.mu.Unlock()
for k := range s.tables {
if len(k) > len(prefix) && k[:len(prefix)] == prefix {
delete(s.tables, k)
}
}
}
// stripEngineOwnedTableFields keeps only the REST-only metadata
// fields. Bookkeeping fields the handler stamps (Kind/ID/Timestamps)
// and engine-owned fields (Schema/NumRows/NumBytes) are dropped so a
// PATCH that echoes the prior GET cannot recursively store a stale
// schema. The Get handler re-merges the engine-side schema on every
// read.
func stripEngineOwnedTableFields(t bqtypes.Table) bqtypes.Table {
return bqtypes.Table{
FriendlyName: t.FriendlyName,
Description: t.Description,
Labels: t.Labels,
ExpirationTime: t.ExpirationTime,
RangePartitioning: t.RangePartitioning,
TimePartitioning: t.TimePartitioning,
Clustering: t.Clustering,
DefaultCollation: t.DefaultCollation,
DefaultCollationSet: t.DefaultCollationSet,
DefaultRoundingMode: t.DefaultRoundingMode,
CaseInsensitive: t.CaseInsensitive,
ResourceTags: t.ResourceTags,
TableConstraints: t.TableConstraints,
Replicas: t.Replicas,
CreationTime: t.CreationTime,
LastModifiedTime: t.LastModifiedTime,
Type: t.Type,
View: t.View,
MaterializedView: t.MaterializedView,
RequirePartitionFilter: t.RequirePartitionFilter,
ExternalDataConfiguration: t.ExternalDataConfiguration,
EncryptionConfiguration: t.EncryptionConfiguration,
Schema: bqtypes.ExtractSchemaPolicyOverlay(t.Schema),
}
}
// stripEngineOwnedDatasetFields is the dataset analogue.
func stripEngineOwnedDatasetFields(ds bqtypes.Dataset) bqtypes.Dataset {
return bqtypes.Dataset{
FriendlyName: ds.FriendlyName,
Description: ds.Description,
Location: ds.Location,
Access: ds.Access,
Labels: ds.Labels,
DefaultTableExpirationMs: ds.DefaultTableExpirationMs,
DefaultPartitionExpirationMs: ds.DefaultPartitionExpirationMs,
DefaultCollation: ds.DefaultCollation,
DefaultCollationSet: ds.DefaultCollationSet,
DefaultRoundingMode: ds.DefaultRoundingMode,
MaxTimeTravelHours: ds.MaxTimeTravelHours,
IsCaseInsensitive: ds.IsCaseInsensitive,
ResourceTags: ds.ResourceTags,
Replicas: ds.Replicas,
CreationTime: ds.CreationTime,
LastModifiedTime: ds.LastModifiedTime,
}
}
// applyTableMetadataOverlay merges the cached REST-only fields onto
// the engine-derived table resource. Cached values win over the engine
// shape for the REST-only fields, but engine-owned fields (Schema,
// NumRows, ...) are preserved.
func applyTableMetadataOverlay(base bqtypes.Table, overlay bqtypes.Table) bqtypes.Table {
if overlay.FriendlyName != "" {
base.FriendlyName = overlay.FriendlyName
}
if overlay.Description != "" {
base.Description = overlay.Description
}
if overlay.Labels != nil {
base.Labels = overlay.Labels
}
if overlay.ExpirationTime != "" {
base.ExpirationTime = overlay.ExpirationTime
}
if overlay.RangePartitioning != nil {
base.RangePartitioning = overlay.RangePartitioning
}
if overlay.TimePartitioning != nil {
base.TimePartitioning = overlay.TimePartitioning
}
if overlay.Clustering != nil {
base.Clustering = overlay.Clustering
}
if overlay.DefaultCollationSet {
base.DefaultCollation = overlay.DefaultCollation
base.DefaultCollationSet = true
}
overlayTableExtendedFields(&base, overlay)
overlayTableDefinitionFields(&base, overlay)
return base
}
// mergeTableMetadataOverlay merges sparse metadata updates onto a
// cached table entry. Unlike applyTableMetadataOverlay (used at GET
// time against engine-derived resources), this helper treats empty
// strings and nil maps as "not provided" so PATCH bodies can carry
// only the fields being changed.
func mergeTableMetadataOverlay(base, patch bqtypes.Table) bqtypes.Table {
if patch.FriendlyName != "" {
base.FriendlyName = patch.FriendlyName
}
if patch.Description != "" {
base.Description = patch.Description
}
if patch.LabelsPatchPresent() {
base.Labels = bqtypes.ApplyLabelsPatch(
base.Labels, true, patch.Labels, patch.LabelsToDelete(),
)
} else if patch.Labels != nil {
base.Labels = patch.Labels
}
if patch.ExpirationTime != "" {
base.ExpirationTime = patch.ExpirationTime
}
if patch.RangePartitioning != nil {
base.RangePartitioning = patch.RangePartitioning
}
if patch.TimePartitioning != nil {
base.TimePartitioning = patch.TimePartitioning
}
if patch.Clustering != nil {
base.Clustering = patch.Clustering
}
if patch.DefaultCollationSet {
base.DefaultCollation = patch.DefaultCollation
base.DefaultCollationSet = true
}
overlayTableExtendedFields(&base, patch)
overlayTableDefinitionFields(&base, patch)
return base
}
func overlayTableExtendedFields(base *bqtypes.Table, src bqtypes.Table) {
if src.DefaultRoundingMode != "" {
base.DefaultRoundingMode = src.DefaultRoundingMode
}
if src.CaseInsensitive != nil {
base.CaseInsensitive = src.CaseInsensitive
}
if src.ResourceTags != nil {
base.ResourceTags = src.ResourceTags
}
if src.TableConstraints != nil {
base.TableConstraints = src.TableConstraints
}
if src.Replicas != nil {
base.Replicas = src.Replicas
}
if src.CreationTime != "" {
base.CreationTime = src.CreationTime
}
if src.LastModifiedTime != "" {
base.LastModifiedTime = src.LastModifiedTime
}
}
func overlayTableDefinitionFields(base *bqtypes.Table, src bqtypes.Table) {
if src.Type != "" {
base.Type = src.Type
}
if src.View != nil {
base.View = src.View
}
if src.MaterializedView != nil {
base.MaterializedView = src.MaterializedView
}
if src.RequirePartitionFilter != nil {
base.RequirePartitionFilter = src.RequirePartitionFilter
}
if src.ExternalDataConfiguration != nil {
base.ExternalDataConfiguration = src.ExternalDataConfiguration
}
if src.EncryptionConfiguration != nil {
base.EncryptionConfiguration = src.EncryptionConfiguration
}
if src.Schema != nil {
base.Schema = bqtypes.MergeSchemaPolicyTags(base.Schema, src.Schema)
}
}
// applyDatasetMetadataOverlay is the dataset analogue.
func applyDatasetMetadataOverlay(base bqtypes.Dataset, overlay bqtypes.Dataset) bqtypes.Dataset {
if overlay.FriendlyName != "" {
base.FriendlyName = overlay.FriendlyName
}
if overlay.Description != "" {
base.Description = overlay.Description
}
if overlay.Location != "" {
base.Location = overlay.Location
}
if overlay.Access != nil {
base.Access = overlay.Access
}
if overlay.Labels != nil {
base.Labels = overlay.Labels
}
if overlay.DefaultTableExpirationMs != "" {
base.DefaultTableExpirationMs = overlay.DefaultTableExpirationMs
}
if overlay.DefaultPartitionExpirationMs != "" {
base.DefaultPartitionExpirationMs = overlay.DefaultPartitionExpirationMs
}
if overlay.DefaultCollationSet {
base.DefaultCollation = overlay.DefaultCollation
base.DefaultCollationSet = true
}
if overlay.DefaultRoundingMode != "" {
base.DefaultRoundingMode = overlay.DefaultRoundingMode
}
if overlay.MaxTimeTravelHours != "" {
base.MaxTimeTravelHours = overlay.MaxTimeTravelHours
}
if overlay.IsCaseInsensitive != nil {
base.IsCaseInsensitive = overlay.IsCaseInsensitive
}
if overlay.ResourceTags != nil {
base.ResourceTags = overlay.ResourceTags
}
if overlay.Replicas != nil {
base.Replicas = overlay.Replicas
}
if overlay.CreationTime != "" {
base.CreationTime = overlay.CreationTime
}
if overlay.LastModifiedTime != "" {
base.LastModifiedTime = overlay.LastModifiedTime
}
return base
}
// mergeDatasetMetadataOverlay merges sparse dataset metadata updates.
func mergeDatasetMetadataOverlay(base, patch bqtypes.Dataset) bqtypes.Dataset {
if patch.FriendlyName != "" {
base.FriendlyName = patch.FriendlyName
}
if patch.Description != "" {
base.Description = patch.Description
}
if patch.Location != "" {
base.Location = patch.Location
}
if patch.Access != nil {
base.Access = patch.Access
}
if patch.LabelsPatchPresent() {
base.Labels = bqtypes.ApplyLabelsPatch(
base.Labels, true, patch.Labels, patch.LabelsToDelete(),
)
} else if patch.Labels != nil {
base.Labels = patch.Labels
}
if patch.DefaultTableExpirationMs != "" {
base.DefaultTableExpirationMs = patch.DefaultTableExpirationMs
}
if patch.DefaultPartitionExpirationMs != "" {
base.DefaultPartitionExpirationMs = patch.DefaultPartitionExpirationMs
}
if patch.DefaultCollationSet {
base.DefaultCollation = patch.DefaultCollation
base.DefaultCollationSet = true
}
if patch.DefaultRoundingMode != "" {
base.DefaultRoundingMode = patch.DefaultRoundingMode
}
if patch.MaxTimeTravelHours != "" {
base.MaxTimeTravelHours = patch.MaxTimeTravelHours
}
if patch.IsCaseInsensitive != nil {
base.IsCaseInsensitive = patch.IsCaseInsensitive
}
if patch.ResourceTags != nil {
base.ResourceTags = patch.ResourceTags
}
if patch.Replicas != nil {
base.Replicas = patch.Replicas
}
return base
}
func bumpDatasetTimestamps(stored *bqtypes.Dataset, existing bqtypes.Dataset, hadExisting bool) {
if hadExisting && existing.CreationTime != "" {
stored.CreationTime = existing.CreationTime
} else if stored.CreationTime == "" {
stored.CreationTime = nowMillis()
}
stored.LastModifiedTime = nowMillis()
}
func bumpTableTimestamps(stored *bqtypes.Table, existing bqtypes.Table, hadExisting bool) {
if hadExisting && existing.CreationTime != "" {
stored.CreationTime = existing.CreationTime
} else if stored.CreationTime == "" {
stored.CreationTime = nowMillis()
}
stored.LastModifiedTime = nowMillis()
}
package handlers
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"io"
"net/http"
"sort"
"strings"
"sync"
"time"
)
// BigQuery Migration v2alpha REST shell.
//
// The upstream BigQuery Migration API runs at
// `https://bigquerymigration.googleapis.com/v2alpha/...` (and the
// v2 alias at the same host). Client libraries
// (cloud.google.com/go/bigquery/migration/apiv2alpha,
// google-cloud-bigquery-migration for Python/Node/Java) read
// `BIGQUERY_MIGRATION_EMULATOR_HOST` and fall back to
// `BIGQUERY_EMULATOR_HOST` so this gateway can serve both surfaces
// from the same listener.
//
// This shell keeps workflow metadata in an in-process sync.Map store
// (no AST translator, no LRO store, no subtask catalog). Create
// returns a DRAFT workflow; :start transitions it to RUNNING so
// client startup probes get structurally-valid responses.
//
// Routes registered (for both `v2alpha` and `v2`):
// GET /{ver}/projects/{projectId}/locations/{location}/workflows
// POST /{ver}/projects/{projectId}/locations/{location}/workflows
// GET /{ver}/projects/{projectId}/locations/{location}/workflows/{workflowId}
// DELETE /{ver}/projects/{projectId}/locations/{location}/workflows/{workflowId}
// POST /{ver}/projects/{projectId}/locations/{location}/workflows/{workflowId}:start
// (dispatched on trailing :start via MigrationWorkflowCustomMethodPOST,
// because net/http's mux can't match `{workflowId}:start` directly.)
const (
migrationWorkflowStateDraft = "DRAFT"
migrationWorkflowStateRunning = "RUNNING"
)
var migrationWorkflowStore sync.Map // canonical name -> *migrationWorkflowResource
type migrationWorkflowResource struct {
Name string `json:"name"`
DisplayName string `json:"displayName,omitempty"`
State string `json:"state,omitempty"`
CreateTime string `json:"createTime,omitempty"`
LastUpdateTime string `json:"lastUpdateTime,omitempty"`
}
func migrationWorkflowParent(r *http.Request) string {
return "projects/" + r.PathValue("projectId") +
"/locations/" + r.PathValue("location")
}
func migrationWorkflowNow() string {
return time.Now().UTC().Format(time.RFC3339Nano)
}
func migrationWorkflowMintID() string {
var b [8]byte
_, _ = rand.Read(b[:])
return hex.EncodeToString(b[:])
}
func migrationWorkflowByName(name string) (*migrationWorkflowResource, bool) {
v, ok := migrationWorkflowStore.Load(name)
if !ok {
return nil, false
}
wf, ok := v.(*migrationWorkflowResource)
return wf, ok && wf != nil
}
// MigrationWorkflowList implements `migration.workflows.list`.
func MigrationWorkflowList(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
prefix := migrationWorkflowParent(r) + "/workflows/"
var out []migrationWorkflowResource
migrationWorkflowStore.Range(func(key, value any) bool {
name, _ := key.(string)
if !strings.HasPrefix(name, prefix) {
return true
}
wf, _ := value.(*migrationWorkflowResource)
if wf != nil {
out = append(out, *wf)
}
return true
})
sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
workflows := make([]any, len(out))
for i := range out {
workflows[i] = out[i]
}
writeJSON(w, http.StatusOK, map[string]any{
"migrationWorkflows": workflows,
})
}
}
// MigrationWorkflowCreate implements `migration.workflows.create`.
func MigrationWorkflowCreate(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid, "invalid body")
return
}
_ = r.Body.Close()
var in migrationWorkflowResource
if len(strings.TrimSpace(string(body))) > 0 {
if err := json.Unmarshal(body, &in); err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"invalid json: "+err.Error())
return
}
}
id := migrationWorkflowMintID()
name := migrationWorkflowParent(r) + "/workflows/" + id
now := migrationWorkflowNow()
rec := migrationWorkflowResource{
Name: name,
DisplayName: in.DisplayName,
State: migrationWorkflowStateDraft,
CreateTime: now,
LastUpdateTime: now,
}
migrationWorkflowStore.Store(name, &rec)
writeJSON(w, http.StatusOK, rec)
}
}
// MigrationWorkflowGet implements `migration.workflows.get`.
func MigrationWorkflowGet(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := migrationWorkflowName(r)
wf, ok := migrationWorkflowByName(name)
if !ok {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: MigrationWorkflow "+name)
return
}
writeJSON(w, http.StatusOK, *wf)
}
}
// MigrationWorkflowDelete implements `migration.workflows.delete`.
func MigrationWorkflowDelete(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := migrationWorkflowName(r)
if _, ok := migrationWorkflowByName(name); !ok {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: MigrationWorkflow "+name)
return
}
migrationWorkflowStore.Delete(name)
writeJSON(w, http.StatusOK, struct{}{})
}
}
// MigrationWorkflowCustomMethodPOST dispatches the AIP-136 ":start"
// custom method that hangs off a workflow resource.
func MigrationWorkflowCustomMethodPOST(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
_, op := splitColonOp(r.PathValue("workflowId"))
switch op {
case "start":
name := migrationWorkflowName(r)
wf, ok := migrationWorkflowByName(name)
if !ok {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: MigrationWorkflow "+name)
return
}
switch wf.State {
case migrationWorkflowStateDraft:
wf.State = migrationWorkflowStateRunning
wf.LastUpdateTime = migrationWorkflowNow()
migrationWorkflowStore.Store(name, wf)
case migrationWorkflowStateRunning:
// no-op
default:
writeError(w, http.StatusBadRequest, reasonFailedPrecondition,
"MigrationWorkflow "+name+" is not in DRAFT or RUNNING state")
return
}
writeJSON(w, http.StatusOK, struct{}{})
case "":
writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
"POST is not allowed on a workflow resource. "+
"Use POST .../workflows to create or :start to start.")
default:
writeError(w, http.StatusNotFound, reasonNotFound,
"Unknown migration workflow custom method ':"+op+"'.")
}
}
}
// migrationWorkflowName reconstructs the canonical resource name from
// the path captures so error envelopes match upstream error text.
func migrationWorkflowName(r *http.Request) string {
wid, _ := splitColonOp(r.PathValue("workflowId"))
return "projects/" + r.PathValue("projectId") +
"/locations/" + r.PathValue("location") +
"/workflows/" + strings.TrimSpace(wid)
}
package handlers
import (
"context"
"net/http"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/models"
)
// modelListKind is the `kind` field for a models.list response. See
// docs/bigquery/docs/reference/rest/v2/models/list.md.
const modelListKind = "bigquery#listModelsResponse"
func modelStore(deps *Dependencies) *models.Store {
if deps.Models == nil {
deps.Models = models.NewStore()
}
return deps.Models
}
func modelIDFromPath(r *http.Request) (projectID, datasetID, modelID string) {
return r.PathValue("projectId"), r.PathValue("datasetId"), r.PathValue("modelId")
}
func modelListEntry(m bqtypes.Model) bqtypes.Model {
return bqtypes.Model{
ModelReference: m.ModelReference,
ModelType: m.ModelType,
CreationTime: m.CreationTime,
LastModifiedTime: m.LastModifiedTime,
Labels: m.Labels,
}
}
// ModelList implements `bigquery.models.list`.
func ModelList(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
datasetID := r.PathValue("datasetId")
all := modelStore(&deps).List(projectID, datasetID, r.URL.Query().Get("filter"))
items := make([]bqtypes.Model, 0, len(all))
for _, m := range all {
items = append(items, modelListEntry(m))
}
resp := map[string]any{
resourceKeyKind: modelListKind,
"models": items,
}
if maxResults := r.URL.Query().Get("maxResults"); maxResults != "" {
if n, err := strconv.Atoi(maxResults); err == nil && n >= 0 && n < len(items) {
resp["models"] = items[:n]
resp["nextPageToken"] = strconv.Itoa(n)
}
}
writeJSON(w, http.StatusOK, resp)
}
}
// ModelGet implements `bigquery.models.get`.
func ModelGet(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, modelID := modelIDFromPath(r)
m, ok := modelStore(&deps).Get(projectID, datasetID, modelID)
if !ok {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: Model "+projectID+":"+datasetID+"."+modelID)
return
}
writeJSON(w, http.StatusOK, m)
}
}
// ModelPatch implements `bigquery.models.patch`.
func ModelPatch(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { NotImplemented(w, r) }
}
// ModelDelete implements `bigquery.models.delete`.
func ModelDelete(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, modelID := modelIDFromPath(r)
if !modelStore(&deps).Delete(projectID, datasetID, modelID) {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: Model "+projectID+":"+datasetID+"."+modelID)
return
}
w.WriteHeader(http.StatusOK)
}
}
// persistModelFromDDL registers CREATE MODEL metadata in the in-memory store.
func persistModelFromDDL(
_ context.Context,
deps *Dependencies,
projectID, defaultDatasetID, sql string,
) *bqtypes.ModelReference {
ref := models.RegisterFromDDL(modelStore(deps), projectID, defaultDatasetID, sql)
return ref
}
func isCreateModelSQL(sql string) bool {
trim := strings.ToUpper(strings.TrimSpace(sql))
return strings.HasPrefix(trim, "CREATE MODEL") ||
strings.HasPrefix(trim, "CREATE OR REPLACE MODEL") ||
strings.HasPrefix(trim, "CREATE MODEL IF NOT EXISTS")
}
package handlers
import (
"net/http"
"os"
)
// projectKind is the value the BigQuery REST API returns for the
// `kind` field of a Project resource. See
// docs/bigquery/docs/reference/rest/v2/projects/list.md.
const projectKind = "bigquery#project"
// projectListKind is the `kind` field for a ProjectList response. See
// docs/bigquery/docs/reference/rest/v2/projects/list.md.
const projectListKind = "bigquery#projectList"
// serviceAccountKind is the `kind` field for a GetServiceAccountResponse.
// See docs/bigquery/docs/reference/rest/v2/projects/getServiceAccount.md.
const serviceAccountKind = "bigquery#getServiceAccountResponse"
// defaultProjectEnvVar is the env var clients may set to override the
// synthetic project ID returned by projects.list. The conventional
// emulator project ID is `test-project`, matching the Spanner emulator
// and BigQuery client-library samples.
const defaultProjectEnvVar = "BIGQUERY_EMULATOR_PROJECT"
// defaultProjectID is the synthetic project ID returned by
// projects.list when defaultProjectEnvVar is unset. It is the value
// most BigQuery client-library sample code uses against the official
// emulator container, so callers that don't bother to configure a
// project ID still get something predictable on the wire.
const defaultProjectID = "test-project"
// defaultProjectIDFromEnv returns the synthetic project ID used by
// projects.list. It honors BIGQUERY_EMULATOR_PROJECT and falls back
// to defaultProjectID. Lookups happen per-request (cheap) so the env
// var can be flipped without restarting the gateway, which is the
// behavior tests and `task emulator:watch` users expect.
func defaultProjectIDFromEnv() string {
if v := os.Getenv(defaultProjectEnvVar); v != "" {
return v
}
return defaultProjectID
}
// projectResource is the per-entry shape inside a ProjectList. The
// fields mirror docs/bigquery/docs/reference/rest/v2/projects/list.md.
type projectResource struct {
Kind string `json:"kind"`
ID string `json:"id"`
NumericID string `json:"numericId,omitempty"`
ProjectReference projectReference `json:"projectReference"`
FriendlyName string `json:"friendlyName,omitempty"`
}
// projectReference is BigQuery's stable handle to a project (mirrors
// the `ProjectReference` resource referenced by projects.list).
type projectReference struct {
ProjectID string `json:"projectId"`
}
// ProjectList implements `bigquery.projects.list`:
//
// GET /bigquery/v2/projects
//
// The emulator does not model IAM, so the response is a single
// synthetic project: BIGQUERY_EMULATOR_PROJECT if set, otherwise
// `test-project`. The shape matches
// docs/bigquery/docs/reference/rest/v2/projects/list.md so client
// libraries can iterate without special-casing the emulator.
func ProjectList(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, _ *http.Request) {
projectID := defaultProjectIDFromEnv()
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: projectListKind,
resourceKeyProjects: []projectResource{{
Kind: projectKind,
ID: projectID,
ProjectReference: projectReference{
ProjectID: projectID,
},
}},
resourceKeyTotalItems: 1,
})
}
}
// ProjectGetServiceAccount implements `bigquery.projects.getServiceAccount`:
//
// GET /bigquery/v2/projects/{projectId}/serviceAccount
//
// Real BigQuery returns the per-project Google-managed service account
// used for KMS interactions. The emulator returns a synthetic email so
// client libraries that hit this endpoint at startup don't fail. The
// email is derived from the path's projectId, matching the documented
// format: `bigquery-emulator@<projectId>.iam.gserviceaccount.com`.
//
// Note: there is no `GET /bigquery/v2/projects/{projectId}` endpoint in
// the public API; this is the endpoint clients actually probe.
func ProjectGetServiceAccount(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
if projectID == "" {
projectID = defaultProjectIDFromEnv()
}
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: serviceAccountKind,
"email": "bigquery-emulator@" + projectID + ".iam.gserviceaccount.com",
})
}
}
package handlers
import (
"encoding/json"
"errors"
"io"
"net/http"
"strconv"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/middleware"
"github.com/vantaboard/bigquery-emulator/gateway/query"
)
// queryResponseKind is the value the BigQuery REST API returns for the
// `kind` field of a QueryResponse resource. See
// docs/bigquery/docs/reference/rest/v2/jobs/query.md.
const queryResponseKind = "bigquery#queryResponse"
func principalEmailFromContext(r *http.Request) string {
if p, ok := middleware.PrincipalFromContext(r.Context()); ok && p.Email != "" {
return p.Email
}
return "emulator@bigquery.local"
}
// statementTypeSelect is the engine-reported statement type for read
// queries. Promoted to a package constant so goconst does not flag the
// repeated literal across handlers.
const statementTypeSelect = "SELECT"
// QueryRun implements `bigquery.jobs.query`:
//
// POST /bigquery/v2/projects/{projectId}/queries
//
// The synchronous query API. The request body is a QueryRequest (see
// gateway/bqtypes); the response is a QueryResponse with a partial result
// page, or an empty result set + non-empty `jobReference` if the query
// is still running and the client should poll `jobs.getQueryResults`.
//
// The handler has two branches:
//
// - dryRun=true forwards the SQL to `enginepb.Query.DryRun` (which
// calls `googlesql::Analyzer` on the C++ side) and turns the
// resulting analyzed schema + estimated bytes into a QueryResponse
// with `jobComplete=true` and an empty rows page.
// - dryRun=false (or unset) forwards the SQL to
// `enginepb.Query.ExecuteQuery`, drains the server-streaming
// response (first message carries the schema, subsequent messages
// carry one row of cells each), marshals each row through
// `bqtypes.CellsToRow`, and records a DONE Job in `deps.Jobs` so
// the returned `jobReference` is discoverable by a later
// `jobs.get`.
//
// SQL dialect: BigQuery's `useLegacySql` field defaults to true on the
// wire. The emulator executes GoogleSQL via the engine; limited legacy
// bracket table references (`[project:dataset.table]`) are transpiled
// in gateway/query before forwarding. Unset and `useLegacySql=false`
// are both treated as GoogleSQL.
//
// Idempotency: `requestId` provides 15-minute idempotency for matching
// requests, per the upstream docs.
func QueryRun(deps Dependencies) http.HandlerFunc {
// Default to a per-handler Registry so unit tests that pass a
// zero-valued Dependencies still get a working job store; the
// server-mode path passes a process-shared Registry from
// gateway.NewServer so jobs survive between requests.
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not read query request body: "+err.Error())
return
}
var req bqtypes.QueryRequest
if len(body) > 0 {
if err := json.Unmarshal(body, &req); err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not parse query request body as JSON: "+err.Error())
return
}
}
if req.DryRun {
runQueryDryRun(deps, w, r, &req)
return
}
runQueryExecute(deps, w, r, &req)
}
}
// runQueryDryRun handles the dryRun=true branch of QueryRun. It
// forwards the request to `enginepb.Query.DryRun`, which on the C++
// side runs the SQL through `googlesql::Analyzer` and returns the
// resolved output schema + an estimated bytes-processed value. The
// gateway folds those into a `QueryResponse` with `jobComplete=true`
// and no rows -- the BigQuery REST contract for a successful dry run.
//
// When `deps.Query` is nil (the gateway was started without an engine
// subprocess), the handler degrades to the 501 stub the rest of the
// route table uses, so unit-mode runs (`task emulator:run
// --engine_binary=""`) keep returning a structured error envelope.
func runQueryDryRun(deps Dependencies, w http.ResponseWriter, r *http.Request,
req *bqtypes.QueryRequest,
) {
if deps.Query == nil {
NotImplemented(w, r)
return
}
projectID := r.PathValue("projectId")
// Pass a defaultDataset hint to the engine when the client set
// `defaultDataset` in the QueryRequest, falling back to the
// server-level `--dataset` default otherwise. The wire field on
// the engine side carries the dataset id only -- the project comes
// from `project_id`, which is always taken from the URL.
defaultDataset := resolveDefaultDataset(deps, req.DefaultDataset)
defaultDataset, extErr := prepareQueryExternalTables(
r.Context(), deps, projectID, req.TableDefinitions, defaultDataset)
if writeExternalTableError(w, extErr) {
return
}
useLegacy := req.UseLegacySQL != nil && *req.UseLegacySQL
sql, sqlErr := query.PrepareEngineSQL(useLegacy, req.Query, projectID, defaultDataset)
if writeLegacySQLError(w, sqlErr) {
return
}
engineReq := &enginepb.QueryRequest{
ProjectId: projectID,
DefaultDatasetId: defaultDataset,
Sql: sql,
UseLegacySql: false,
Parameters: parametersToEngineMap(req.Parameters),
PrincipalEmail: principalEmailFromContext(r),
}
resp, err := deps.Query.DryRun(r.Context(), engineReq)
if queryGRPCToHTTPError(w, err) {
return
}
out := bqtypes.QueryResponse{
Kind: queryResponseKind,
Schema: schemaFromProto(resp.GetSchema()),
TotalBytesProcessed: formatDryRunBytes(resp.GetEstimatedBytesProcessed()),
JobComplete: true,
}
writeJSON(w, http.StatusOK, out)
}
// formatDryRunBytes renders estimated bytes as the decimal string
// BigQuery REST always emits for dry-run responses.
func formatDryRunBytes(estimated int64) string {
return jobs.FormatDryRunBytesProcessed(estimated)
}
// runQueryExecute handles the dryRun=false branch of QueryRun. It
// forwards the SQL to the engine's server-streaming
// `enginepb.Query.ExecuteQuery` RPC, drains the schema + row stream,
// marshals every row through `bqtypes.CellsToRow`, and stamps the
// resulting `QueryResponse` with a DONE jobReference recorded in
// `deps.Jobs`.
//
// Stream contract (mirrors the comment on proto QueryResultRow):
// the first message carries the schema; subsequent messages each
// carry one row's cells. The schema reader is defensive -- if a
// later message also sets `schema` it is ignored, and a message
// with neither schema nor cells contributes an empty row.
//
// When `deps.Query` is nil (the gateway was started without an
// engine subprocess), the handler degrades to the structured 501
// stub the rest of the route table uses; unit-mode runs (`task
// emulator:run --engine_binary=""`) keep returning a BigQuery-
// shaped error envelope instead of a panic.
//
//nolint:funlen // engine stream drain + session/DDL stamping in one handler
func runQueryExecute(deps Dependencies, w http.ResponseWriter, r *http.Request,
req *bqtypes.QueryRequest,
) {
if deps.Query == nil {
NotImplemented(w, r)
return
}
projectID := r.PathValue("projectId")
if parseAbortSessionSQL(req.Query) {
handleAbortSessionQuery(deps, w, projectID, req.Location, req.ConnProperties)
return
}
defaultDataset, ok := queryDefaultDatasetForExecute(deps, w, r, projectID, req)
if !ok {
return
}
if isMultiStatementScript(req.Query) {
runQueryScriptExecute(deps, w, r, req, defaultDataset)
return
}
useLegacy := req.UseLegacySQL != nil && *req.UseLegacySQL
sql := expandQueryParamsInSQL(req.Query, req.Parameters)
bindParams := stripExpandedArrayParams(req.Query, sql, req.Parameters)
sql, sqlErr := query.PrepareEngineSQL(useLegacy, sql, projectID, defaultDataset)
if writeLegacySQLError(w, sqlErr) {
return
}
sql, sqlErr = query.PrepareEngineSQLForJobs(r.Context(), deps.Catalog, deps.Jobs, projectID, sql)
if sqlErr != nil {
writeError(w, http.StatusBadRequest, reasonInvalidQuery, sqlErr.Error())
return
}
engineReq := &enginepb.QueryRequest{
ProjectId: projectID,
DefaultDatasetId: defaultDataset,
Sql: sql,
UseLegacySql: false,
Parameters: parametersToEngineMap(bindParams),
PrincipalEmail: principalEmailFromContext(r),
}
start := time.Now().UTC()
stream, err := deps.Query.ExecuteQuery(r.Context(), engineReq)
if queryGRPCToHTTPError(w, err) {
return
}
schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, ok := streamQueryResults(w, stream)
if !ok {
return
}
end := time.Now().UTC()
// Record the completed job (with its rows + schema cached)
// before assembling the response so the jobReference we emit
// is the same one a later jobs.get / jobs.getQueryResults will
// find. The current registry does not track engine-side
// bytes-processed yet, so we stamp 0; the long-running-jobs
// follow-up wires the real metric.
restSchema := schemaFromProto(schema)
if err := query.AppendResultsFromQueryRequest(
r.Context(), deps.Catalog, req, projectID, restSchema, rows); err != nil {
writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
return
}
restDmlStats := dmlStatsFromProto(dmlStats)
var ddlTarget *bqtypes.RoutineReference
if statementType == "CREATE_FUNCTION" || statementType == "CREATE_PROCEDURE" ||
statementType == "CREATE_TABLE_FUNCTION" {
ddlTarget = persistRoutineFromDDL(
r.Context(), &deps, projectID, defaultDataset, req.Query)
}
if isCreateModelSQL(req.Query) {
persistModelFromDDL(r.Context(), &deps, projectID, defaultDataset, req.Query)
}
handleViewDDLAfterQuery(&deps, projectID, defaultDataset, req.Query, statementType)
result := &jobs.QueryResult{
Schema: restSchema,
Rows: rows,
DmlStats: restDmlStats,
StatementType: statementType,
EmulatorRoute: emulatorRoute,
EmulatorPhases: emulatorPhases,
DdlTargetRoutine: ddlTarget,
}
sessionInfo := sessionStore(&deps).Resolve(
projectID, req.Location, req.CreateSession, req.ConnProperties)
job := deps.Jobs.CompleteQueryWithResult(
projectID, req.Location, 0, start, end, result)
job.UserEmail = principalEmailFromContext(r)
if deps.Catalog != nil && len(rows) > 0 &&
(statementType == "" || statementType == statementTypeSelect) {
if dest, err := query.MaterializeImplicitDestination(
r.Context(), deps.Catalog, projectID, defaultDataset,
job.JobReference.JobID, restSchema, rows); err == nil {
job.Configuration = &jobs.JobConfiguration{
JobType: jobConfigurationKindQuery,
Query: &jobs.JobConfigurationQuery{
Query: req.Query,
DestinationTable: dest,
},
}
}
}
stampJobSessionInfo(job, sessionInfo)
// Surface the `emulatorRoute` debug field only to loopback
// callers so external BigQuery client libraries pointed at the
// emulator see the same JSON shape they would against the
// public REST surface. Non-loopback callers get an empty
// string, which `assembleQueryResponse` translates into "no
// emulatorRoute property" because the JSON struct tag is
// `omitempty`. See
// `docs/ENGINE_POLICY.md`.
visibleRoute := ""
visiblePhases := map[string]int64(nil)
if middleware.IsLoopback(r.Context()) {
visibleRoute = emulatorRoute
visiblePhases = emulatorPhases
}
out := assembleQueryResponse(
job, restSchema, rows, dmlStats, restDmlStats, statementType,
visibleRoute, visiblePhases, ddlTarget, sessionInfo)
writeJSON(w, http.StatusOK, out)
}
// positionalParameterMapKey is the proto map key for a positional
// query parameter (REST entries with an empty `name`). The key must
// not collide with legitimate named parameters such as `@p0`.
const positionalParameterMapKeyPrefix = "__pos_"
// into the engine's `map<string, QueryParameter>` proto field
// (defined in `proto/emulator.proto`). The gateway's wire payload
// is a list of `QueryParameter` objects, each carrying `name`,
// `parameterType`, and `parameterValue`; the engine speaks a
// name-keyed map plus a `type_kind` / `value_json` value pair.
//
// Named parameters flow through unchanged. Positional parameters use
// synthetic map keys (`p0`, `p1`, ...) because the engine proto is
// name-keyed; the frontend strips those keys before binding `?`
// placeholders.
//
// Values with a missing `parameterType` are skipped because the
// engine cannot decode them without a type tag.
func parametersToEngineMap(in []bqtypes.QueryParameter) map[string]*enginepb.QueryParameter {
if len(in) == 0 {
return nil
}
out := make(map[string]*enginepb.QueryParameter, len(in))
positionalIdx := 0
for _, p := range in {
if p.ParameterType == nil {
continue
}
name := p.Name
if name == "" {
name = positionalParameterMapKeyPrefix + strconv.Itoa(positionalIdx)
positionalIdx++
}
typeKind, typeJSON := bqtypes.ParameterTypeWire(p.ParameterType)
var value string
if p.ParameterValue != nil {
value = bqtypes.ParameterValueWire(p.ParameterType, p.ParameterValue)
}
out[name] = &enginepb.QueryParameter{
TypeKind: typeKind,
TypeJson: typeJSON,
ValueJson: value,
}
}
return out
}
// streamQueryResults drains the engine's query stream into the
// per-RPC schema, DML stats, row slice, trailing statement type, and
// trailing emulator route. Returns ok=false after emitting an HTTP
// error envelope, in which case the caller must stop processing the
// request.
//
// The proto contract (see `proto/emulator.proto::QueryResultRow`)
// allows up to five message kinds on a single reply: schema, cells,
// dml_stats, statement_type, and emulator_route. The schema and
// dml_stats messages pin themselves to the first arrival (later
// resends are ignored); the two trailers are each emitted at most
// once at end-of-stream.
func streamQueryResults(w http.ResponseWriter, stream enginepb.Query_ExecuteQueryClient) (
*enginepb.TableSchema, *enginepb.DmlStats, []bqtypes.Row, string, string, map[string]int64, bool,
) {
var schema *enginepb.TableSchema
var dmlStats *enginepb.DmlStats
var statementType string
var emulatorRoute string
var emulatorPhases map[string]int64
rows := make([]bqtypes.Row, 0)
for {
msg, err := stream.Recv()
if errors.Is(err, io.EOF) {
break
}
if queryGRPCToHTTPError(w, err) {
return nil, nil, nil, "", "", nil, false
}
if s := msg.GetSchema(); s != nil {
// Per proto contract the first message carries the
// schema and subsequent messages carry rows. Keep the
// first schema we see and ignore any later resends so
// we don't reset mid-stream.
if schema == nil {
schema = s
}
continue
}
if d := msg.GetDmlStats(); d != nil {
// Final summary message for an INSERT/UPDATE/DELETE/
// MERGE statement. The engine emits exactly one of
// these on the DML path; later messages on the same
// stream are ignored (the proto contract is "one or
// the other" per RPC).
if dmlStats == nil {
dmlStats = d
}
continue
}
if st := msg.GetStatementType(); st != "" {
// Trailing per-reply marker the engine emits to tell
// the gateway which BigQuery REST `statementType`
// envelope to populate. Keep the first non-empty value
// and ignore later resends.
if statementType == "" {
statementType = st
}
continue
}
if er := msg.GetEmulatorRoute(); er != "" {
// Trailing per-reply marker the engine emits with the
// canonical lowercase-snake disposition string. The
// gateway forwards it onto
// `Job.statistics.query.emulatorRoute` for loopback
// callers only (see
// `gateway/middleware/loopback.go`); the gating lives
// at the call site, not here, so the streaming pass
// stays a straight collector.
if emulatorRoute == "" {
emulatorRoute = er
}
continue
}
if pt := msg.GetPhaseTimings(); pt != nil && len(pt.GetPhases()) > 0 {
if emulatorPhases == nil {
emulatorPhases = make(map[string]int64, len(pt.GetPhases()))
}
for _, phase := range pt.GetPhases() {
if phase.GetName() != "" {
emulatorPhases[phase.GetName()] = phase.GetDurationUs()
}
}
continue
}
rows = append(rows, bqtypes.CellsToRowForSchema(msg.GetCells(), schema))
}
return schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, true
}
// dmlStatsFromProto converts an engine-side DmlStats message into
// the REST-wire envelope. Returns nil when the engine never emitted
// a DmlStats summary (i.e. the statement was a SELECT, not DML).
func dmlStatsFromProto(d *enginepb.DmlStats) *bqtypes.DmlStats {
if d == nil {
return nil
}
return &bqtypes.DmlStats{
InsertedRowCount: strconv.FormatInt(d.GetInsertedRowCount(), 10),
UpdatedRowCount: strconv.FormatInt(d.GetUpdatedRowCount(), 10),
DeletedRowCount: strconv.FormatInt(d.GetDeletedRowCount(), 10),
}
}
// assembleQueryResponse builds the synchronous jobs.query response
// envelope: SELECT-shape (schema + rows + totalRows) by default,
// switching to the DML-shape (numDmlAffectedRows + zeroed selects)
// when the stream surfaced a DmlStats message. When the engine
// trailed a non-empty `statement_type` the gateway folds it into
// the BigQuery REST `Job.statistics.query.statementType` envelope;
// when `emulatorRoute` is non-empty (the caller already gated this
// on `middleware.IsLoopback`), it lands on the loopback-only
// `Job.statistics.query.emulatorRoute` debug field.
func assembleQueryResponse(job *jobs.Job, restSchema *bqtypes.TableSchema, rows []bqtypes.Row,
dmlStats *enginepb.DmlStats, restDmlStats *bqtypes.DmlStats,
statementType string,
emulatorRoute string,
emulatorPhases map[string]int64,
ddlTargetRoutine *bqtypes.RoutineReference,
sessionInfo *bqtypes.SessionInfo,
) bqtypes.QueryResponse {
jobRef := job.JobReference
out := bqtypes.QueryResponse{
Kind: queryResponseKind,
Schema: restSchema,
JobReference: &jobRef,
JobComplete: true,
TotalRows: strconv.FormatUint(uint64(len(rows)), 10),
Rows: rows,
TotalBytesProcessed: job.Statistics.TotalBytesProcessed,
CreationTime: job.Statistics.CreationTime,
StartTime: job.Statistics.StartTime,
EndTime: job.Statistics.EndTime,
Location: jobRef.Location,
}
if sessionInfo != nil {
out.SessionInfo = sessionInfo
}
if sessionInfo != nil || statementType != "" || emulatorRoute != "" ||
len(emulatorPhases) > 0 || ddlTargetRoutine != nil {
stats := &bqtypes.JobStatistics{SessionInfo: sessionInfo}
if statementType != "" || emulatorRoute != "" || len(emulatorPhases) > 0 ||
ddlTargetRoutine != nil {
stats.Query = &bqtypes.JobStatistics2{
StatementType: statementType,
EmulatorRoute: emulatorRoute,
EmulatorPhases: emulatorPhases,
DdlTargetRoutine: ddlTargetRoutine,
}
}
out.Statistics = stats
}
if restDmlStats != nil {
// Surface BigQuery's DML statistics envelope. `dmlStats`
// carries the per-operation row counts; `numDmlAffectedRows`
// is the legacy aggregate (sum of inserted + updated +
// deleted) that older client libraries still read.
out.DmlStats = restDmlStats
out.NumDmlAffectedRows = strconv.FormatInt(
dmlStats.GetInsertedRowCount()+
dmlStats.GetUpdatedRowCount()+
dmlStats.GetDeletedRowCount(), 10)
// Plain DML has no result rows. `THEN RETURN` keeps schema +
// rows alongside the stats envelope.
if len(rows) == 0 {
out.Schema = nil
out.Rows = nil
out.TotalRows = "0"
}
}
return out
}
// getQueryResultsKind is the value the BigQuery REST API returns for
// the `kind` field of a GetQueryResultsResponse resource. See
// docs/bigquery/docs/reference/rest/v2/jobs/getQueryResults.md.
const getQueryResultsKind = "bigquery#getQueryResultsResponse"
// QueryGetResults implements `bigquery.jobs.getQueryResults`:
//
// GET /bigquery/v2/projects/{projectId}/queries/{jobId}
//
// Replays the cached rows + schema for a previously-run synchronous
// query. The query-select-e2e charter
// (`docs/ENGINE_POLICY.md`) limits this
// handler to single-page reads: the registry holds the entire
// result set in memory at job-completion time and this endpoint
// emits it back in one response. Real cursored pagination (multi-page
// `pageToken` lifecycle, partial reads from a streaming engine) is
// deferred to a later change alongside long-running async jobs.
//
// Documented query parameters and current behavior:
//
// - `startIndex` (uint): respected; rows < startIndex are skipped.
// - `maxResults` (uint): respected; rows beyond the slice are
// truncated. The result is still flagged as complete (no
// pageToken is emitted) -- the BigQuery contract permits
// returning fewer rows than requested.
// - `pageToken` (string): the emulator never mints one, so a
// non-empty value cannot be honored. We respond with an empty
// page and `jobComplete=true` to keep client polling loops happy.
// - `location` (string): when both the stored job's location and
// the query parameter are non-empty and disagree, returns 404
// notFound -- the same shape BigQuery uses when callers route a
// `getQueryResults` to the wrong region.
// - `timeoutMs`, `formatOptions`: ignored. Queries are synchronous
// so timeoutMs is moot, and the f/v wire shape is the only
// output format the emulator emits.
//
// Project mismatches between the URL path and the stored job map to
// 404 notFound rather than 403, matching BigQuery's behavior of
// hiding cross-project jobs behind the same 404 envelope.
func QueryGetResults(deps Dependencies) http.HandlerFunc {
if deps.Jobs == nil {
deps.Jobs = jobs.NewRegistry()
}
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
jobID := r.PathValue("jobId")
job, ok := deps.Jobs.Get(jobID)
if !ok || job.JobReference.ProjectID != projectID {
writeError(w, http.StatusNotFound, "notFound",
"Not found: Job "+projectID+":"+jobID)
return
}
if loc := r.URL.Query().Get("location"); loc != "" &&
job.JobReference.Location != "" &&
loc != job.JobReference.Location {
writeError(w, http.StatusNotFound, "notFound",
"Not found: Job "+projectID+":"+jobID+
" in location "+loc)
return
}
writeJSON(w, http.StatusOK, assembleGetQueryResultsResponse(r, job))
}
}
// assembleGetQueryResultsResponse builds the JSON envelope
// `QueryGetResults` returns. Pulled out of the handler to keep its
// cyclomatic budget below the funlen cap once the
// loopback-gated `emulatorRoute` replay landed.
func assembleGetQueryResultsResponse(r *http.Request, job *jobs.Job) bqtypes.QueryResponse {
schema, allRows, dmlStats, statementType, emulatorRoute, emulatorPhases, ddlTargetRoutine := queryResultFields(job)
pageRows, pageToken := paginateResults(allRows, r.URL.Query())
jobRef := job.JobReference
out := bqtypes.QueryResponse{
Kind: getQueryResultsKind,
Schema: schema,
JobReference: &jobRef,
JobComplete: true,
TotalRows: strconv.FormatUint(uint64(len(allRows)), 10),
Rows: pageRows,
PageToken: pageToken,
TotalBytesProcessed: job.Statistics.TotalBytesProcessed,
Location: jobRef.Location,
}
out.Statistics = getQueryResultsStatistics(
r, statementType, emulatorRoute, emulatorPhases, ddlTargetRoutine, job.Statistics.SessionInfo)
if job.Statistics.SessionInfo != nil {
out.SessionInfo = job.Statistics.SessionInfo
}
if dmlStats != nil {
applyDmlStatsToGetQueryResults(&out, dmlStats)
}
return out
}
package handlers
import (
"net/http"
"net/url"
"strconv"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/middleware"
)
func queryResultFields(job *jobs.Job) (
schema *bqtypes.TableSchema,
allRows []bqtypes.Row,
dmlStats *bqtypes.DmlStats,
statementType string,
emulatorRoute string,
emulatorPhases map[string]int64,
ddlTargetRoutine *bqtypes.RoutineReference,
) {
if result := job.Result; result != nil {
schema = result.Schema
allRows = result.Rows
dmlStats = result.DmlStats
statementType = result.StatementType
emulatorRoute = result.EmulatorRoute
emulatorPhases = result.EmulatorPhases
ddlTargetRoutine = result.DdlTargetRoutine
}
return schema, allRows, dmlStats, statementType, emulatorRoute, emulatorPhases, ddlTargetRoutine
}
func getQueryResultsStatistics(
r *http.Request,
statementType string,
emulatorRoute string,
emulatorPhases map[string]int64,
ddlTargetRoutine *bqtypes.RoutineReference,
sessionInfo *bqtypes.SessionInfo,
) *bqtypes.JobStatistics {
visibleRoute := ""
visiblePhases := map[string]int64(nil)
if middleware.IsLoopback(r.Context()) {
visibleRoute = emulatorRoute
visiblePhases = emulatorPhases
}
if statementType == "" && visibleRoute == "" && len(visiblePhases) == 0 &&
ddlTargetRoutine == nil && sessionInfo == nil {
return nil
}
stats := &bqtypes.JobStatistics{SessionInfo: sessionInfo}
if statementType != "" || visibleRoute != "" || len(visiblePhases) > 0 || ddlTargetRoutine != nil {
stats.Query = &bqtypes.JobStatistics2{
StatementType: statementType,
EmulatorRoute: visibleRoute,
EmulatorPhases: visiblePhases,
DdlTargetRoutine: ddlTargetRoutine,
}
}
return stats
}
func applyDmlStatsToGetQueryResults(out *bqtypes.QueryResponse, dmlStats *bqtypes.DmlStats) {
out.DmlStats = dmlStats
inserted, _ := strconv.ParseInt(dmlStats.InsertedRowCount, 10, 64)
updated, _ := strconv.ParseInt(dmlStats.UpdatedRowCount, 10, 64)
deleted, _ := strconv.ParseInt(dmlStats.DeletedRowCount, 10, 64)
out.NumDmlAffectedRows = strconv.FormatInt(inserted+updated+deleted, 10)
out.Schema = nil
out.Rows = nil
out.TotalRows = "0"
}
// defaultQueryResultsPageSize mirrors BigQuery's documented default
// `maxResults` for jobs.getQueryResults when the caller omits it.
const defaultQueryResultsPageSize uint64 = 10000
// paginateResults slices cached query rows using startIndex,
// maxResults, and pageToken. pageToken (when set) is a decimal string
// encoding the next start row index, matching tabledata.list.
func paginateResults(allRows []bqtypes.Row, q url.Values) ([]bqtypes.Row, string) {
total := uint64(len(allRows))
start := parseUintQuery(q, "startIndex", 0)
if tok := q.Get("pageToken"); tok != "" {
if off, err := strconv.ParseUint(tok, 10, 64); err == nil {
start = off
} else {
return nil, ""
}
}
limit := defaultQueryResultsPageSize
if q.Get("maxResults") != "" {
limit = parseUintQuery(q, "maxResults", defaultQueryResultsPageSize)
}
// maxResults=0 means "wait for completion, return zero rows" (browseTable
// sample). Never mint a pageToken in that case or Node polls forever.
if limit == 0 {
return nil, ""
}
if start >= total {
return nil, ""
}
end := min(start+limit, total)
var nextToken string
if end < total {
nextToken = strconv.FormatUint(end, 10)
}
return allRows[start:end], nextToken
}
// parseUintQuery returns the named query parameter as a uint64,
// falling back to defaultVal when the value is missing or unparsable.
// Pulled out so the pagination helper stops nesting if-inside-if.
func parseUintQuery(q url.Values, key string, defaultVal uint64) uint64 {
s := q.Get(key)
if s == "" {
return defaultVal
}
v, err := strconv.ParseUint(s, 10, 64)
if err != nil {
return defaultVal
}
return v
}
package handlers
import (
"regexp"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
// createTempTableDestinationRE matches `CREATE TEMP TABLE `_SESSION`.table`
// and `CREATE TEMP TABLE `_SESSION`.`table“ shapes bigframes emits.
var createTempTableDestinationRE = regexp.MustCompile(
"(?i)CREATE\\s+TEMP\\s+TABLE\\s+`([^`]+)`\\.(?:`([^`]+)`|([^\\s(]+))")
// stampQueryJobDestination fills configuration.query.destinationTable on
// CREATE TABLE / CREATE TEMP TABLE jobs so BigQuery clients (e.g.
// bigframes SessionResourceManager.create_temp_table) can read
// query_job.destination after jobs.insert.
func stampQueryJobDestination(projectID string, job *jobs.Job, statementType string) {
if job == nil || job.Configuration == nil || job.Configuration.Query == nil {
return
}
switch statementType {
case "CREATE_TABLE", "CREATE_TABLE_AS_SELECT":
default:
return
}
dest := parseCreateTableDestination(projectID, job.Configuration.Query.Query)
if dest != nil {
job.Configuration.Query.DestinationTable = dest
}
}
func parseCreateTableDestination(projectID, sql string) *bqtypes.TableReference {
sql = strings.TrimSpace(sql)
if m := createTempTableDestinationRE.FindStringSubmatch(sql); len(m) >= 3 {
datasetID := m[1]
tableID := m[2]
if tableID == "" && len(m) > 3 {
tableID = m[3]
}
if datasetID == "" || tableID == "" {
return nil
}
return &bqtypes.TableReference{
ProjectID: projectID,
DatasetID: datasetID,
TableID: tableID,
}
}
return nil
}
package handlers
import (
"fmt"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// expandQueryParamsInSQL applies gateway-side SQL rewrites for query parameters
// the DuckDB transpiler cannot lower yet (ARRAY IN UNNEST).
func expandQueryParamsInSQL(sql string, params []bqtypes.QueryParameter) string {
sql = expandArrayParamsInSQL(sql, params)
return expandPositionalArrayParamsInSQL(sql, params)
}
// expandArrayParamsInSQL rewrites `IN UNNEST(@name)` filters into `IN (...)`
// literal lists when the caller supplied a named ARRAY query parameter.
// The DuckDB transpiler does not yet lower IN UNNEST(array_param) shapes;
// expanding at the gateway preserves analyzer binding for scalar params while
// unblocking thirdparty array-parameter samples.
func expandArrayParamsInSQL(sql string, params []bqtypes.QueryParameter) string {
out := sql
for _, p := range params {
if p.Name == "" || p.ParameterType == nil ||
strings.ToUpper(p.ParameterType.Type) != sqlTypeARRAY {
continue
}
if p.ParameterValue == nil || len(p.ParameterValue.ArrayValues) == 0 {
continue
}
quoted := make([]string, 0, len(p.ParameterValue.ArrayValues))
for _, av := range p.ParameterValue.ArrayValues {
if av.Value == "" {
continue
}
quoted = append(quoted, fmt.Sprintf("'%s'",
strings.ReplaceAll(av.Value, "'", "''")))
}
if len(quoted) == 0 {
continue
}
list := strings.Join(quoted, ", ")
name := p.Name
out = strings.ReplaceAll(out, "NOT IN UNNEST(@"+name+")", "NOT IN ("+list+")")
out = strings.ReplaceAll(out, "NOT IN UNNEST(`"+name+"`)", "NOT IN ("+list+")")
out = strings.ReplaceAll(out, "IN UNNEST(@"+name+")", "IN ("+list+")")
out = strings.ReplaceAll(out, "IN UNNEST(`"+name+"`)", "IN ("+list+")")
}
return out
}
// stripExpandedArrayParams removes ARRAY parameters that expandQueryParamsInSQL
// inlined via IN/NOT IN UNNEST so the engine is not asked to bind them.
func stripExpandedArrayParams(
originalSQL, expandedSQL string,
params []bqtypes.QueryParameter,
) []bqtypes.QueryParameter {
if len(params) == 0 {
return params
}
out := make([]bqtypes.QueryParameter, 0, len(params))
remaining := originalSQL
for _, p := range params {
if p.ParameterType == nil ||
strings.ToUpper(strings.TrimSpace(p.ParameterType.Type)) != sqlTypeARRAY {
out = append(out, p)
continue
}
if p.Name != "" {
if namedArrayParamWasExpanded(originalSQL, expandedSQL, p.Name) {
continue
}
out = append(out, p)
continue
}
if !strings.Contains(remaining, "IN UNNEST(?)") {
out = append(out, p)
continue
}
if p.ParameterValue == nil || len(p.ParameterValue.ArrayValues) == 0 {
out = append(out, p)
continue
}
remaining = strings.Replace(remaining, "IN UNNEST(?)", "IN (__expanded__)", 1)
}
return out
}
func namedArrayParamWasExpanded(originalSQL, expandedSQL, name string) bool {
for _, pattern := range []string{
"IN UNNEST(@" + name + ")",
"NOT IN UNNEST(@" + name + ")",
"IN UNNEST(`" + name + "`)",
"NOT IN UNNEST(`" + name + "`)",
} {
if strings.Contains(originalSQL, pattern) && !strings.Contains(expandedSQL, pattern) {
return true
}
}
return false
}
// stripExpandedPositionalArrayParams removes positional ARRAY parameters
// that expandQueryParamsInSQL inlined via IN UNNEST(?) so engine binding
// indices stay aligned with the remaining ? placeholders.
func stripExpandedPositionalArrayParams(sql string, params []bqtypes.QueryParameter) []bqtypes.QueryParameter {
return stripExpandedArrayParams(sql, expandQueryParamsInSQL(sql, params), params)
}
func expandPositionalArrayParamsInSQL(sql string, params []bqtypes.QueryParameter) string {
out := sql
for _, p := range params {
if p.Name != "" || p.ParameterType == nil {
continue
}
if strings.ToUpper(strings.TrimSpace(p.ParameterType.Type)) != "ARRAY" {
continue
}
if !strings.Contains(out, "IN UNNEST(?)") {
continue
}
if p.ParameterValue == nil || len(p.ParameterValue.ArrayValues) == 0 {
continue
}
quoted := make([]string, 0, len(p.ParameterValue.ArrayValues))
for _, av := range p.ParameterValue.ArrayValues {
if av.Value == "" {
continue
}
quoted = append(quoted, fmt.Sprintf("'%s'",
strings.ReplaceAll(av.Value, "'", "''")))
}
if len(quoted) == 0 {
continue
}
out = strings.Replace(out, "IN UNNEST(?)",
"IN ("+strings.Join(quoted, ", ")+")", 1)
}
return out
}
package handlers
import (
"encoding/json"
"io"
"net/http"
"strconv"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/routines"
)
// routineListKind is the `kind` field for a routines.list response. See
// docs/bigquery/docs/reference/rest/v2/routines/list.md.
const routineListKind = "bigquery#listRoutinesResponse"
const (
defaultRoutineType = "SCALAR_FUNCTION"
defaultRoutineLanguage = "SQL"
)
func routineStore(deps *Dependencies) *routines.Store {
if deps.Routines == nil {
deps.Routines = routines.NewStore()
}
return deps.Routines
}
func routineIDFromPath(r *http.Request) (projectID, datasetID, routineID string) {
return r.PathValue("projectId"), r.PathValue("datasetId"), r.PathValue("routineId")
}
func decodeRoutineBody(w http.ResponseWriter, r *http.Request) (bqtypes.Routine, bool) {
var rt bqtypes.Routine
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not read routine request body: "+err.Error())
return rt, false
}
if len(body) == 0 {
return rt, true
}
if err := json.Unmarshal(body, &rt); err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Could not parse routine request body as JSON: "+err.Error())
return rt, false
}
return rt, true
}
func routineResource(projectID, datasetID, routineID string, rt bqtypes.Routine) bqtypes.Routine {
rt.RoutineReference = bqtypes.RoutineReference{
ProjectID: projectID,
DatasetID: datasetID,
RoutineID: routineID,
}
if rt.RoutineType == "" {
rt.RoutineType = defaultRoutineType
}
if rt.Language == "" {
rt.Language = defaultRoutineLanguage
}
if rt.CreationTime == "" {
rt.CreationTime = nowMillis()
}
rt.LastModifiedTime = nowMillis()
if rt.Etag == "" {
rt.Etag = routines.MintEtag()
}
return rt
}
// routineListEntry trims a routine to the fields upstream list returns
// when readMask is unset.
func routineListEntry(rt bqtypes.Routine) bqtypes.Routine {
return bqtypes.Routine{
Etag: rt.Etag,
RoutineReference: rt.RoutineReference,
RoutineType: rt.RoutineType,
CreationTime: rt.CreationTime,
LastModifiedTime: rt.LastModifiedTime,
Language: rt.Language,
}
}
// RoutineList implements `bigquery.routines.list`:
//
// GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines
func RoutineList(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
datasetID := r.PathValue("datasetId")
var all []bqtypes.Routine
if routineCatalogEnabled(&deps) {
all = mergeRoutineSources(r.Context(), &deps, projectID, datasetID, r.URL.Query().Get("filter"))
} else {
all = routineStore(&deps).List(projectID, datasetID, r.URL.Query().Get("filter"))
}
items := make([]bqtypes.Routine, 0, len(all))
for _, rt := range all {
items = append(items, routineListEntry(rt))
}
resp := map[string]any{
resourceKeyKind: routineListKind,
"routines": items,
}
if maxResults := r.URL.Query().Get("maxResults"); maxResults != "" {
if n, err := strconv.Atoi(maxResults); err == nil && n >= 0 && n < len(items) {
resp["routines"] = items[:n]
resp["nextPageToken"] = strconv.Itoa(n)
}
}
writeJSON(w, http.StatusOK, resp)
}
}
// RoutineGet implements `bigquery.routines.get`:
//
// GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines/{routineId}
func RoutineGet(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, routineID := routineIDFromPath(r)
rt, ok := routineLookupExisting(r.Context(), &deps, projectID, datasetID, routineID)
if !ok {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: Routine "+projectID+":"+datasetID+"."+routineID)
return
}
writeJSON(w, http.StatusOK, rt)
}
}
// RoutineInsert implements `bigquery.routines.insert`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines
func RoutineInsert(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
datasetID := r.PathValue("datasetId")
rt, ok := decodeRoutineBody(w, r)
if !ok {
return
}
routineID := rt.RoutineReference.RoutineID
if routineID == "" {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Required routineReference.routineId is missing.")
return
}
if rt.DefinitionBody == "" {
writeError(w, http.StatusBadRequest, reasonInvalid,
"Required definitionBody is missing.")
return
}
if rt.RoutineType == "" {
rt.RoutineType = defaultRoutineType
}
if rt.Language == "" {
rt.Language = defaultRoutineLanguage
}
out := routineResource(projectID, datasetID, routineID, rt)
if routineCatalogEnabled(&deps) {
if catalogInsertRoutine(r.Context(), w, &deps, projectID, datasetID, routineID, out) {
return
}
} else if !routineStore(&deps).Insert(out) {
writeError(w, http.StatusConflict, reasonDuplicate,
"Already Exists: Routine "+projectID+":"+datasetID+"."+routineID)
return
}
routineStore(&deps).Upsert(out)
writeJSON(w, http.StatusOK, out)
}
}
// RoutineUpdate implements `bigquery.routines.update`:
//
// PUT /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines/{routineId}
func RoutineUpdate(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, routineID := routineIDFromPath(r)
existing, ok := routineLookupExisting(r.Context(), &deps, projectID, datasetID, routineID)
if !ok {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: Routine "+projectID+":"+datasetID+"."+routineID)
return
}
rt, ok := decodeRoutineBody(w, r)
if !ok {
return
}
out := routineResource(projectID, datasetID, routineID, rt)
out.CreationTime = existing.CreationTime
out.Etag = routines.MintEtag()
if routineCatalogEnabled(&deps) {
if err := catalogUpsertRoutine(r.Context(), &deps, out); err != nil {
if grpcToHTTPError(w, err) {
return
}
return
}
}
routineStore(&deps).Upsert(out)
writeJSON(w, http.StatusOK, out)
}
}
// RoutineDelete implements `bigquery.routines.delete`:
//
// DELETE /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines/{routineId}
func RoutineDelete(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, routineID := routineIDFromPath(r)
if routineCatalogEnabled(&deps) {
if err := catalogDeleteRoutine(r.Context(), &deps, projectID, datasetID, routineID); err != nil {
if grpcToHTTPError(w, err) {
return
}
return
}
}
if !routineStore(&deps).Delete(projectID, datasetID, routineID) {
if !routineCatalogEnabled(&deps) {
writeError(w, http.StatusNotFound, reasonNotFound,
"Not found: Routine "+projectID+":"+datasetID+"."+routineID)
return
}
}
writeJSON(w, http.StatusOK, struct{}{})
}
}
package handlers
import (
"context"
"net/http"
"slices"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/routines"
)
func routineCatalogEnabled(deps *Dependencies) bool {
return deps.Catalog != nil
}
func routineRefProto(projectID, datasetID, routineID string) *enginepb.RoutineRef {
return &enginepb.RoutineRef{
ProjectId: projectID,
DatasetId: datasetID,
RoutineId: routineID,
}
}
func routineListKey(ref bqtypes.RoutineReference) string {
return ref.ProjectID + ":" + ref.DatasetID + "." + ref.RoutineID
}
func routineTypeFromFilter(filter string) string {
const prefix = "routineType:"
if filter == "" || !strings.HasPrefix(filter, prefix) {
return ""
}
return strings.TrimSpace(filter[len(prefix):])
}
// overlayRoutineFromStore merges gateway-store timestamps and etag onto a
// catalog-backed routine. The engine catalog does not persist those fields.
func overlayRoutineFromStore(catalog bqtypes.Routine, stored bqtypes.Routine, ok bool) bqtypes.Routine {
if !ok {
return catalog
}
if stored.CreationTime != "" {
catalog.CreationTime = stored.CreationTime
}
if stored.LastModifiedTime != "" {
catalog.LastModifiedTime = stored.LastModifiedTime
}
if stored.Etag != "" {
catalog.Etag = stored.Etag
}
return catalog
}
func ensureRoutineTimestamps(rt *bqtypes.Routine) {
if rt.CreationTime == "" {
rt.CreationTime = nowMillis()
}
if rt.LastModifiedTime == "" {
rt.LastModifiedTime = rt.CreationTime
}
}
func routineFromDescriptor(desc *enginepb.RoutineDescriptor) bqtypes.Routine {
if desc == nil {
return bqtypes.Routine{}
}
ref := desc.GetRoutine()
rt := bqtypes.Routine{
RoutineReference: bqtypes.RoutineReference{
ProjectID: ref.GetProjectId(),
DatasetID: ref.GetDatasetId(),
RoutineID: ref.GetRoutineId(),
},
RoutineType: bqtypes.RoutineType(desc.GetRoutineType()),
Language: bqtypes.RoutineLanguage(desc.GetLanguage()),
DefinitionBody: desc.GetDefinitionBody(),
}
ddl := desc.GetDdlSql()
if ddl == "" {
return rt
}
parsed, ok := routines.ParseCreateRoutineDDL(ref.GetProjectId(), ref.GetDatasetId(), ddl)
if !ok {
return rt
}
applyRoutineFromDDL(&rt, parsed)
return rt
}
func applyRoutineFromDDL(rt *bqtypes.Routine, parsed bqtypes.Routine) {
if parsed.DefinitionBody != "" {
rt.DefinitionBody = parsed.DefinitionBody
}
if len(parsed.Arguments) > 0 {
rt.Arguments = parsed.Arguments
}
if parsed.ReturnType != nil {
rt.ReturnType = parsed.ReturnType
}
if parsed.RoutineType != "" {
rt.RoutineType = parsed.RoutineType
}
if parsed.Language != "" {
rt.Language = parsed.Language
}
if parsed.PythonOptions != nil {
rt.PythonOptions = parsed.PythonOptions
}
}
func catalogGetRoutine(
ctx context.Context,
deps *Dependencies,
projectID, datasetID, routineID string,
) (bqtypes.Routine, bool) {
resp, err := deps.Catalog.GetRoutine(ctx, &enginepb.GetRoutineRequest{
Routine: routineRefProto(projectID, datasetID, routineID),
})
if err != nil || resp == nil || resp.GetRoutine() == nil {
return bqtypes.Routine{}, false
}
return routineFromDescriptor(resp.GetRoutine()), true
}
func catalogListRoutines(ctx context.Context, deps *Dependencies, projectID, datasetID string) []bqtypes.Routine {
resp, err := deps.Catalog.ListRoutines(ctx, &enginepb.ListRoutinesRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
})
if err != nil || resp == nil {
return nil
}
out := make([]bqtypes.Routine, 0, len(resp.GetRoutines()))
for _, desc := range resp.GetRoutines() {
out = append(out, routineFromDescriptor(desc))
}
return out
}
// mergeRoutineSources unions catalog and in-memory store entries when the
// catalog is enabled so DDL-registered routines appear in list even if the
// engine list lags, and store-only routines remain visible.
func mergeRoutineSources(
ctx context.Context,
deps *Dependencies,
projectID, datasetID, filter string,
) []bqtypes.Routine {
store := routineStore(deps)
fromStore := store.List(projectID, datasetID, filter)
if !routineCatalogEnabled(deps) {
return fromStore
}
wantType := routineTypeFromFilter(filter)
fromCatalog := catalogListRoutines(ctx, deps, projectID, datasetID)
byKey := make(map[string]bqtypes.Routine, len(fromCatalog)+len(fromStore))
order := make([]string, 0, len(fromCatalog)+len(fromStore))
add := func(rt bqtypes.Routine) {
if wantType != "" && string(rt.RoutineType) != wantType {
return
}
key := routineListKey(rt.RoutineReference)
if _, exists := byKey[key]; exists {
return
}
ensureRoutineTimestamps(&rt)
byKey[key] = rt
order = append(order, key)
}
for _, rt := range fromCatalog {
ref := rt.RoutineReference
stored, ok := store.Get(ref.ProjectID, ref.DatasetID, ref.RoutineID)
add(overlayRoutineFromStore(rt, stored, ok))
}
for _, rt := range fromStore {
key := routineListKey(rt.RoutineReference)
if _, exists := byKey[key]; exists {
continue
}
add(rt)
}
slices.Sort(order)
out := make([]bqtypes.Routine, 0, len(order))
for _, key := range order {
out = append(out, byKey[key])
}
return out
}
func routineLookupExisting(
ctx context.Context,
deps *Dependencies,
projectID, datasetID, routineID string,
) (bqtypes.Routine, bool) {
store := routineStore(deps)
if routineCatalogEnabled(deps) {
if rt, ok := catalogGetRoutine(ctx, deps, projectID, datasetID, routineID); ok {
stored, found := store.Get(projectID, datasetID, routineID)
rt = overlayRoutineFromStore(rt, stored, found)
ensureRoutineTimestamps(&rt)
return rt, true
}
}
rt, ok := store.Get(projectID, datasetID, routineID)
if ok {
ensureRoutineTimestamps(&rt)
}
return rt, ok
}
// catalogInsertRoutine persists a new routine via the catalog. Returns true when
// the HTTP response has been written (conflict or engine error).
func catalogInsertRoutine(
ctx context.Context,
w http.ResponseWriter,
deps *Dependencies,
projectID, datasetID, routineID string,
out bqtypes.Routine,
) bool {
if _, exists := catalogGetRoutine(ctx, deps, projectID, datasetID, routineID); exists {
writeError(w, http.StatusConflict, reasonDuplicate,
"Already Exists: Routine "+projectID+":"+datasetID+"."+routineID)
return true
}
if err := catalogUpsertRoutine(ctx, deps, out); err != nil {
grpcToHTTPError(w, err)
return true
}
return false
}
func catalogUpsertRoutine(ctx context.Context, deps *Dependencies, rt bqtypes.Routine) error {
ddl := routines.BuildDDLFromRoutine(rt)
_, err := deps.Catalog.UpsertRoutine(ctx, &enginepb.UpsertRoutineRequest{
Routine: &enginepb.RoutineDescriptor{
Routine: routineRefProto(
rt.RoutineReference.ProjectID,
rt.RoutineReference.DatasetID,
rt.RoutineReference.RoutineID,
),
RoutineType: string(rt.RoutineType),
Language: string(rt.Language),
DefinitionBody: rt.DefinitionBody,
DdlSql: ddl,
},
})
return err
}
func catalogDeleteRoutine(ctx context.Context, deps *Dependencies, projectID, datasetID, routineID string) error {
_, err := deps.Catalog.DeleteRoutine(ctx, &enginepb.DeleteRoutineRequest{
Routine: routineRefProto(projectID, datasetID, routineID),
})
return err
}
// persistRoutineFromDDL registers a routine parsed from CREATE FUNCTION /
// PROCEDURE DDL in the in-memory store and mirrors it to the catalog when
// enabled so RoutineGet sees the same metadata as RoutineInsert.
func persistRoutineFromDDL(
ctx context.Context,
deps *Dependencies,
projectID, defaultDatasetID, sql string,
) *bqtypes.RoutineReference {
store := routineStore(deps)
ref := routines.RegisterFromDDL(store, projectID, defaultDatasetID, sql)
if ref == nil || !routineCatalogEnabled(deps) {
return ref
}
rt, ok := store.Get(ref.ProjectID, ref.DatasetID, ref.RoutineID)
if !ok {
return ref
}
_ = catalogUpsertRoutine(ctx, deps, rt)
return ref
}
package handlers
import (
"context"
"encoding/json"
"net/http"
"strconv"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// rowAccessPolicyListKind is the `kind` field for a
// rowAccessPolicies.list response. See
// docs/bigquery/docs/reference/rest/v2/rowAccessPolicies/list.md.
const rowAccessPolicyListKind = "bigquery#listRowAccessPoliciesResponse"
const rowAccessPolicyKind = "bigquery#rowAccessPolicy"
type rowAccessPolicyReference struct {
ProjectID string `json:"projectId"`
DatasetID string `json:"datasetId"`
TableID string `json:"tableId"`
PolicyID string `json:"policyId"`
}
type rowAccessPolicyWire struct {
Kind string `json:"kind,omitempty"`
Etag string `json:"etag,omitempty"`
RowAccessPolicyReference rowAccessPolicyReference `json:"rowAccessPolicyReference"`
FilterPredicate string `json:"filterPredicate"`
CreationTime string `json:"creationTime,omitempty"`
LastModifiedTime string `json:"lastModifiedTime,omitempty"`
Grantees []string `json:"grantees,omitempty"`
}
func rowAccessPolicyPathValues(r *http.Request) (projectID, datasetID, tableID, policyID string) {
return r.PathValue("projectId"), r.PathValue("datasetId"),
r.PathValue("tableId"), r.PathValue("policyId")
}
func tableRef(projectID, datasetID, tableID string) *enginepb.TableRef {
return &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
}
func msToTimestampString(ms int64) string {
if ms <= 0 {
return ""
}
return strconv.FormatInt(ms, 10)
}
func policyToWire(p *enginepb.RowAccessPolicy) rowAccessPolicyWire {
ref := rowAccessPolicyReference{
ProjectID: p.GetTable().GetProjectId(),
DatasetID: p.GetTable().GetDatasetId(),
TableID: p.GetTable().GetTableId(),
PolicyID: p.GetPolicyId(),
}
return rowAccessPolicyWire{
Kind: rowAccessPolicyKind,
Etag: p.GetPolicyId(),
RowAccessPolicyReference: ref,
FilterPredicate: p.GetFilterPredicate(),
CreationTime: msToTimestampString(p.GetCreationTimeMs()),
LastModifiedTime: msToTimestampString(p.GetLastModifiedTimeMs()),
Grantees: append([]string(nil), p.GetGrantees()...),
}
}
func decodeRowAccessPolicyBody(r *http.Request) (rowAccessPolicyWire, error) {
var body rowAccessPolicyWire
if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
return rowAccessPolicyWire{}, err
}
return body, nil
}
// RowAccessPolicyList implements `bigquery.rowAccessPolicies.list`.
func RowAccessPolicyList(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID, _ := rowAccessPolicyPathValues(r)
if deps.Catalog == nil {
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: rowAccessPolicyListKind,
"rowAccessPolicies": []any{},
})
return
}
resp, err := deps.Catalog.ListRowAccessPolicies(r.Context(),
&enginepb.ListRowAccessPoliciesRequest{
Table: tableRef(projectID, datasetID, tableID),
})
if err != nil {
writeGRPCError(w, err)
return
}
policies := make([]rowAccessPolicyWire, 0, len(resp.GetPolicies()))
for _, p := range resp.GetPolicies() {
policies = append(policies, policyToWire(p))
}
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: rowAccessPolicyListKind,
"rowAccessPolicies": policies,
})
}
}
// RowAccessPolicyInsert implements `bigquery.rowAccessPolicies.insert`.
func RowAccessPolicyInsert(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID, _ := rowAccessPolicyPathValues(r)
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
body, err := decodeRowAccessPolicyBody(r)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
return
}
policyID := body.RowAccessPolicyReference.PolicyID
if policyID == "" {
policyID = r.URL.Query().Get("policyId")
}
if policyID == "" {
writeError(w, http.StatusBadRequest, reasonInvalid, "policyId is required")
return
}
now := time.Now().UnixMilli()
resp, err := deps.Catalog.UpsertRowAccessPolicy(r.Context(),
&enginepb.UpsertRowAccessPolicyRequest{
Policy: &enginepb.RowAccessPolicy{
Table: tableRef(projectID, datasetID, tableID),
PolicyId: policyID,
FilterPredicate: body.FilterPredicate,
Grantees: body.Grantees,
CreationTimeMs: now,
LastModifiedTimeMs: now,
},
})
if err != nil {
writeGRPCError(w, err)
return
}
writeJSON(w, http.StatusOK, policyToWire(resp.GetPolicy()))
}
}
// RowAccessPolicyGet implements `bigquery.rowAccessPolicies.get`.
func RowAccessPolicyGet(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID, policyID := rowAccessPolicyPathValues(r)
if deps.Catalog == nil {
NotFound(w, r)
return
}
resp, err := deps.Catalog.ListRowAccessPolicies(r.Context(),
&enginepb.ListRowAccessPoliciesRequest{
Table: tableRef(projectID, datasetID, tableID),
})
if err != nil {
writeGRPCError(w, err)
return
}
for _, p := range resp.GetPolicies() {
if p.GetPolicyId() == policyID {
writeJSON(w, http.StatusOK, policyToWire(p))
return
}
}
NotFound(w, r)
}
}
// RowAccessPolicyUpdate implements `bigquery.rowAccessPolicies.update`.
func RowAccessPolicyUpdate(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID, policyID := rowAccessPolicyPathValues(r)
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
body, err := decodeRowAccessPolicyBody(r)
if err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
return
}
now := time.Now().UnixMilli()
resp, err := deps.Catalog.UpsertRowAccessPolicy(r.Context(),
&enginepb.UpsertRowAccessPolicyRequest{
Policy: &enginepb.RowAccessPolicy{
Table: tableRef(projectID, datasetID, tableID),
PolicyId: policyID,
FilterPredicate: body.FilterPredicate,
Grantees: body.Grantees,
LastModifiedTimeMs: now,
},
})
if err != nil {
writeGRPCError(w, err)
return
}
writeJSON(w, http.StatusOK, policyToWire(resp.GetPolicy()))
}
}
// RowAccessPolicyDelete implements `bigquery.rowAccessPolicies.delete`.
func RowAccessPolicyDelete(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID, policyID := rowAccessPolicyPathValues(r)
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
_, err := deps.Catalog.DeleteRowAccessPolicy(r.Context(),
&enginepb.DeleteRowAccessPolicyRequest{
Table: tableRef(projectID, datasetID, tableID),
PolicyId: policyID,
})
if err != nil {
writeGRPCError(w, err)
return
}
w.WriteHeader(http.StatusOK)
}
}
// RowAccessPolicyDispatch routes table-scoped rowAccessPolicies methods.
func RowAccessPolicyDispatch(deps Dependencies) http.HandlerFunc {
list := RowAccessPolicyList(deps)
insert := RowAccessPolicyInsert(deps)
get := RowAccessPolicyGet(deps)
update := RowAccessPolicyUpdate(deps)
del := RowAccessPolicyDelete(deps)
iam := RowAccessPolicyIamPolicy(deps)
return func(w http.ResponseWriter, r *http.Request) {
policyID := r.PathValue("policyId")
if policyID != "" {
if r.Method == http.MethodGet {
get(w, r)
return
}
if r.Method == http.MethodPut || r.Method == http.MethodPatch {
update(w, r)
return
}
if r.Method == http.MethodDelete {
del(w, r)
return
}
if r.Method == http.MethodPost {
iam(w, r)
return
}
writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
"HTTP method not supported for this rowAccessPolicies endpoint")
return
}
switch r.Method {
case http.MethodGet:
list(w, r)
case http.MethodPost:
insert(w, r)
default:
writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
"HTTP method not supported for this rowAccessPolicies endpoint")
}
}
}
func writeGRPCError(w http.ResponseWriter, err error) {
st, ok := status.FromError(err)
if !ok {
writeError(w, http.StatusInternalServerError, reasonInternalError, err.Error())
return
}
switch st.Code() {
case codes.NotFound:
writeError(w, http.StatusNotFound, reasonNotFound, st.Message())
case codes.InvalidArgument:
writeError(w, http.StatusBadRequest, reasonInvalid, st.Message())
case codes.PermissionDenied:
writeError(w, http.StatusForbidden, reasonAccessDenied, st.Message())
case codes.Unimplemented:
NotImplemented(w, nil)
default:
writeError(w, http.StatusInternalServerError, reasonInternalError, st.Message())
}
}
// SyncColumnGovernanceFromSchema persists policy tags from a REST schema
// patch into the engine catalog for query-time masking.
func SyncColumnGovernanceFromSchema(
ctx context.Context,
deps Dependencies,
projectID, datasetID, tableID string,
schema *bqtypes.TableSchema,
) {
if deps.Catalog == nil || schema == nil {
return
}
for _, field := range schema.Fields {
maskKind := field.MaskKind
if maskKind == "" && field.PolicyTags != nil && len(field.PolicyTags.Names) > 0 {
maskKind = "SHA256"
}
if maskKind == "" {
continue
}
col := &enginepb.ColumnGovernance{
ColumnName: field.Name,
MaskKind: maskKind,
}
if field.PolicyTags != nil {
col.PolicyTags = append([]string(nil), field.PolicyTags.Names...)
}
_, _ = deps.Catalog.SetColumnGovernance(ctx, &enginepb.SetColumnGovernanceRequest{
Table: tableRef(projectID, datasetID, tableID),
Column: col,
})
}
}
func RowAccessPolicyIamPolicy(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { NotImplemented(w, r) }
}
package handlers
import (
"context"
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/middleware"
"github.com/vantaboard/bigquery-emulator/gateway/query"
)
// isMultiStatementScript reports whether sql is a DECLARE/SET script the
// gateway executes statement-by-statement.
func stripBlockComments(sql string) string {
var out strings.Builder
out.Grow(len(sql))
for i := 0; i < len(sql); {
if i+1 < len(sql) && sql[i] == '/' && sql[i+1] == '*' {
i += 2
for i+1 < len(sql) && (sql[i] != '*' || sql[i+1] != '/') {
i++
}
if i+1 < len(sql) {
i += 2
}
continue
}
out.WriteByte(sql[i])
i++
}
return out.String()
}
func trimLeadingSQLComments(sql string) string {
var kept []string
for line := range strings.SplitSeq(sql, "\n") {
if strings.HasPrefix(strings.TrimSpace(line), "--") {
continue
}
kept = append(kept, line)
}
return strings.TrimSpace(strings.Join(kept, "\n"))
}
func sqlForScriptDetection(sql string) string {
return trimLeadingSQLComments(stripBlockComments(sql))
}
var setKeywordRE = regexp.MustCompile(`(?i)\bSET\b`)
var beginEndBlockRE = regexp.MustCompile(`(?is)^\s*BEGIN\s+(.*)\s+END\s*;?\s*$`)
func unwrapBeginEndBlock(sql string) string {
trimmed := strings.TrimSpace(sql)
if m := beginEndBlockRE.FindStringSubmatch(trimmed); len(m) == 2 {
return strings.TrimSpace(m[1])
}
return sql
}
func isMultiStatementScript(sql string) bool {
trimmed := strings.TrimSpace(sql)
upper := strings.ToUpper(sqlForScriptDetection(trimmed))
// DDL setup statements (CREATE PROCEDURE bodies embed BEGIN/SET) must
// not enter the script splitter.
if strings.HasPrefix(upper, "CREATE ") ||
strings.HasPrefix(upper, "DROP ") ||
strings.HasPrefix(upper, "ALTER ") {
return false
}
sql = unwrapBeginEndBlock(trimmed)
detected := sqlForScriptDetection(sql)
upper = strings.ToUpper(detected)
return strings.Contains(upper, "DECLARE ") ||
strings.Contains(upper, "CALL ") ||
(strings.Count(detected, ";") >= 2 && setKeywordRE.MatchString(upper))
}
// needsEngineScriptExecution reports whether the script must run as one
// engine round-trip so DECLARE/CALL variable scope survives (the engine's
// ExecuteMultiStmtScript path). Legacy SET+UNNEST substitution scripts
// without DECLARE/CALL stay on the per-statement split path.
func needsEngineScriptExecution(sql string) bool {
trimmed := strings.TrimSpace(sql)
upper := strings.ToUpper(trimLeadingSQLComments(trimmed))
return strings.HasPrefix(upper, "BEGIN") ||
strings.Contains(upper, "DECLARE ") ||
strings.Contains(upper, "CALL ")
}
// splitScriptStatements splits script SQL on semicolons outside quotes.
func splitScriptStatements(sql string) []string {
var out []string
var b strings.Builder
inQuote := false
for i := 0; i < len(sql); i++ {
c := sql[i]
if c == '\'' {
inQuote = !inQuote
b.WriteByte(c)
continue
}
if c == ';' && !inQuote {
stmt := strings.TrimSpace(b.String())
if stmt != "" && !isCommentOnlyStatement(stmt) {
out = append(out, stmt)
}
b.Reset()
continue
}
b.WriteByte(c)
}
if tail := strings.TrimSpace(b.String()); tail != "" && !isCommentOnlyStatement(tail) {
out = append(out, tail)
}
return out
}
func isCommentOnlyStatement(s string) bool {
for line := range strings.SplitSeq(s, "\n") {
t := strings.TrimSpace(line)
if t == "" {
continue
}
if !strings.HasPrefix(t, "--") {
return false
}
}
return true
}
type scriptStmtKind int
const (
scriptStmtDeclare scriptStmtKind = iota
scriptStmtSet
scriptStmtCall
scriptStmtQuery
)
type scriptStatement struct {
kind scriptStmtKind
sql string
name string
}
func classifyScriptStatement(sql string) scriptStatement {
trim := trimLeadingSQLComments(sql)
upper := strings.ToUpper(trim)
switch {
case strings.HasPrefix(upper, "DECLARE "):
rest := strings.TrimSpace(trim[8:])
name := rest
if sp := strings.IndexAny(rest, " \t"); sp > 0 {
name = rest[:sp]
}
return scriptStatement{kind: scriptStmtDeclare, sql: trim, name: name}
case strings.HasPrefix(upper, "SET "):
name, body := parseSetStatement(trim)
return scriptStatement{kind: scriptStmtSet, sql: body, name: name}
case strings.HasPrefix(upper, "CALL "):
return scriptStatement{kind: scriptStmtCall, sql: trim}
default:
return scriptStatement{kind: scriptStmtQuery, sql: trim}
}
}
func parseSetStatement(sql string) (name, body string) {
rest := strings.TrimSpace(sql[4:])
before, after, ok := strings.Cut(rest, "=")
if !ok {
return "", sql
}
name = strings.TrimSpace(before)
body = strings.TrimSpace(after)
body = strings.TrimSuffix(body, ";")
return name, body
}
func substituteScriptVars(sql string, vars map[string][]string) string {
out := sql
for name, vals := range vars {
if len(vals) == 0 {
continue
}
quoted := make([]string, len(vals))
for i, s := range vals {
quoted[i] = fmt.Sprintf("'%s'", strings.ReplaceAll(s, "'", "''"))
}
list := strings.Join(quoted, ", ")
out = strings.ReplaceAll(out, "UNNEST("+name+")", "UNNEST(["+list+"])")
out = strings.ReplaceAll(out, "UNNEST(`"+name+"`)", "UNNEST(["+list+"])")
}
return out
}
func arrayFromRow(rows []bqtypes.Row) []string {
if len(rows) != 1 || len(rows[0].F) != 1 {
return nil
}
arr, ok := rows[0].F[0].V.([]bqtypes.Cell)
if !ok {
return nil
}
out := make([]string, 0, len(arr))
for _, c := range arr {
if s, ok := c.V.(string); ok {
out = append(out, s)
}
}
return out
}
func executeScriptStatement(
ctx context.Context,
deps Dependencies,
projectID, defaultDataset, sql string,
useLegacy bool,
) (*enginepb.TableSchema, []bqtypes.Row, string, string, error) {
sql, err := query.PrepareEngineSQL(useLegacy, sql, projectID, defaultDataset)
if err != nil {
return nil, nil, "", "", err
}
engineReq := &enginepb.QueryRequest{
ProjectId: projectID,
DefaultDatasetId: defaultDataset,
Sql: sql,
UseLegacySql: false,
}
stream, err := deps.Query.ExecuteQuery(ctx, engineReq)
if err != nil {
return nil, nil, "", "", err
}
schema, _, rows, statementType, emulatorRoute, _, streamErr := drainSyncStream(stream)
if streamErr != nil {
return nil, nil, "", "", streamErr
}
return schema, rows, statementType, emulatorRoute, nil
}
func stampChildJobParent(job *jobs.Job, parentID string) {
job.ParentJobID = parentID
job.Statistics.ParentJobID = parentID
}
type scriptExecOutcome struct {
childCount int
finalSchema *enginepb.TableSchema
finalRows []bqtypes.Row
finalStmtType string
finalRoute string
}
// declareToCreateConstant lowers DECLARE to CREATE CONSTANT for the engine's
// AnalyzeNextStatement script loop (DECLARE is script-only parse syntax).
func declareToCreateConstant(stmt string) string {
trim := trimLeadingSQLComments(strings.TrimSpace(stmt))
if !strings.HasPrefix(strings.ToUpper(trim), "DECLARE ") {
return trim
}
rest := strings.TrimSpace(trim[8:])
rest = strings.TrimSuffix(rest, ";")
defaultPart := ""
if idx := strings.Index(strings.ToUpper(rest), " DEFAULT "); idx >= 0 {
defaultPart = strings.TrimSpace(rest[idx+len(" DEFAULT "):])
rest = strings.TrimSpace(rest[:idx])
}
before, after, ok := strings.Cut(rest, " ")
if !ok {
return trim
}
name := strings.TrimSpace(before)
typeName := strings.TrimSpace(after)
if defaultPart != "" {
return fmt.Sprintf("CREATE CONSTANT %s = %s", name, defaultPart)
}
return fmt.Sprintf("CREATE CONSTANT %s = CAST(NULL AS %s)", name, typeName)
}
func transformScriptDeclares(sql string) string {
inner := unwrapBeginEndBlock(sql)
// Control-flow scripts must reach googlesql::ScriptExecutor with DECLARE
// syntax and intact IF/WHILE bodies. Per-statement splitting breaks
// semicolons inside THEN/ELSE branches.
if scriptNeedsGoogleSQLExecutor(inner) {
return strings.TrimSpace(sql)
}
parts := splitScriptStatements(inner)
if len(parts) == 0 {
return inner
}
out := make([]string, 0, len(parts))
for _, p := range parts {
out = append(out, declareToCreateConstant(p))
}
return strings.Join(out, ";\n")
}
func runLegacySplitScript(
ctx context.Context,
deps Dependencies,
r *http.Request,
projectID string,
parent *jobs.Job,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
sql string,
defaultDataset string,
useLegacy bool,
) (*scriptExecOutcome, error) {
vars := make(map[string][]string)
out := &scriptExecOutcome{}
for _, raw := range splitScriptStatements(unwrapBeginEndBlock(sql)) {
st := classifyScriptStatement(raw)
switch st.kind {
case scriptStmtDeclare:
vars[st.name] = nil
continue
case scriptStmtCall, scriptStmtSet, scriptStmtQuery:
stmtSQL := st.sql
if st.kind == scriptStmtQuery {
stmtSQL = substituteScriptVars(stmtSQL, vars)
}
childPosted := *posted
childPosted.JobReference.JobID = ""
childCfg := *cfg
qCopy := *cfg.Query
qCopy.Query = stmtSQL
childCfg.Query = &qCopy
child := newPendingJob(deps, projectID, &childPosted, &childCfg)
stampChildJobParent(child, parent.JobReference.JobID)
childStart := time.Now().UTC()
schema, rows, statementType, emulatorRoute, err := executeScriptStatement(
ctx, deps, projectID, defaultDataset, stmtSQL, useLegacy)
if err != nil {
return nil, err
}
if st.kind == scriptStmtSet && st.name != "" {
if arr := arrayFromRow(rows); len(arr) > 0 {
vars[st.name] = arr
}
}
childEnd := time.Now().UTC()
finalizeDoneJob(deps, child, childStart, childEnd,
schema, nil, rows, statementType, emulatorRoute, nil, nil, nil, r)
stampChildJobParent(child, parent.JobReference.JobID)
out.childCount++
if st.kind == scriptStmtQuery {
out.finalSchema = schema
out.finalRows = rows
out.finalStmtType = statementType
out.finalRoute = emulatorRoute
}
}
}
return out, nil
}
func runScriptStatements(
ctx context.Context,
deps Dependencies,
r *http.Request,
projectID string,
parent *jobs.Job,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
sql string,
defaultDataset string,
useLegacy bool,
) (*scriptExecOutcome, error) {
if needsEngineScriptExecution(sql) {
return runEngineScript(
ctx, deps, r, projectID, parent, posted, cfg,
defaultDataset, sql, useLegacy)
}
return runLegacySplitScript(
ctx, deps, r, projectID, parent, posted, cfg,
sql, defaultDataset, useLegacy)
}
func finalizeScriptParentJob(
parent *jobs.Job,
parentStart, parentEnd time.Time,
out *scriptExecOutcome,
) {
parent.Status.State = jobs.JobStateDone
parent.Statistics.StartTime = millisString(parentStart)
parent.Statistics.EndTime = millisString(parentEnd)
parent.Statistics.NumChildJobs = strconv.Itoa(out.childCount)
if out.finalRows != nil || out.finalSchema != nil {
restSchema := schemaFromProto(out.finalSchema)
parent.Result = &jobs.QueryResult{
Schema: restSchema,
Rows: out.finalRows,
StatementType: out.finalStmtType,
EmulatorRoute: out.finalRoute,
}
if out.finalStmtType != "" {
parent.Statistics.Query = &bqtypes.JobStatistics2{StatementType: out.finalStmtType}
}
}
}
// runSyncScriptQueryInsert executes DECLARE/SET/SELECT scripts and
// registers a parent job plus per-statement child jobs.
func runSyncScriptQueryInsert(
deps Dependencies,
w http.ResponseWriter,
r *http.Request,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
) {
projectID := r.PathValue("projectId")
parent := newPendingJob(deps, projectID, posted, cfg)
parentStart := time.Now().UTC()
useLegacy := false
if cfg.Query.UseLegacySQL != nil {
useLegacy = *cfg.Query.UseLegacySQL
}
defaultDataset := resolveDefaultDataset(deps, cfg.Query.DefaultDataset)
out, err := runScriptStatements(
r.Context(), deps, r, projectID, parent, posted, cfg,
cfg.Query.Query, defaultDataset, useLegacy)
if err != nil {
finalizeFailedJob(deps, parent, parentStart, err)
if queryGRPCToHTTPError(w, err) {
return
}
writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
return
}
finalizeScriptParentJob(parent, parentStart, time.Now().UTC(), out)
writeJSON(w, http.StatusOK, parent)
}
// runQueryScriptExecute handles the jobs.query path for multi-statement
// scripts (client.query uses jobs.query when the request body is simple).
func runQueryScriptExecute(
deps Dependencies,
w http.ResponseWriter,
r *http.Request,
req *bqtypes.QueryRequest,
defaultDataset string,
) {
projectID := r.PathValue("projectId")
parentStart := time.Now().UTC()
posted := &jobs.Job{JobReference: bqtypes.JobReference{
ProjectID: projectID,
Location: req.Location,
}}
cfg := &jobs.JobConfiguration{
JobType: jobConfigurationKindQuery,
Query: &jobs.JobConfigurationQuery{Query: req.Query},
}
parent := newPendingJob(deps, projectID, posted, cfg)
useLegacy := req.UseLegacySQL != nil && *req.UseLegacySQL
out, err := runScriptStatements(
r.Context(), deps, r, projectID, parent, posted, cfg,
req.Query, defaultDataset, useLegacy)
if err != nil {
finalizeFailedJob(deps, parent, parentStart, err)
if queryGRPCToHTTPError(w, err) {
return
}
writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
return
}
parentEnd := time.Now().UTC()
finalizeScriptParentJob(parent, parentStart, parentEnd, out)
restSchema := schemaFromProto(out.finalSchema)
visibleRoute := ""
if middleware.IsLoopback(r.Context()) {
visibleRoute = out.finalRoute
}
sessionInfo := sessionStore(&deps).Resolve(
projectID, req.Location, req.CreateSession, req.ConnProperties)
stampJobSessionInfo(parent, sessionInfo)
outResp := assembleQueryResponse(
parent, restSchema, out.finalRows, nil, nil,
out.finalStmtType, visibleRoute, nil, nil, sessionInfo)
writeJSON(w, http.StatusOK, outResp)
}
package handlers
import (
"context"
"net/http"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
type engineScriptFinalResult struct {
schema *enginepb.TableSchema
rows []bqtypes.Row
statementType string
emulatorRoute string
}
func registerEngineScriptChildJobs(
ctx context.Context,
deps Dependencies,
r *http.Request,
projectID, defaultDataset string,
useLegacy bool,
parent *jobs.Job,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
sql string,
final engineScriptFinalResult,
) int {
inner := unwrapBeginEndBlock(sql)
if scriptNeedsGoogleSQLExecutor(inner) {
if final.schema == nil && len(final.rows) == 0 {
return 0
}
registerFinalSelectChildJob(
deps, r, projectID, parent, posted, cfg, final)
return 1
}
statements := splitScriptStatements(inner)
lastQueryIdx := -1
for i, raw := range statements {
if classifyScriptStatement(raw).kind == scriptStmtQuery {
lastQueryIdx = i
}
}
childCount := 0
for i, raw := range statements {
st := classifyScriptStatement(raw)
switch st.kind {
case scriptStmtDeclare, scriptStmtCall:
continue
case scriptStmtSet:
registerReExecutedEngineScriptChild(
ctx, deps, r, projectID, defaultDataset, useLegacy,
parent, posted, cfg, st.sql)
childCount++
case scriptStmtQuery:
if i == lastQueryIdx {
registerFinalSelectChildJob(
deps, r, projectID, parent, posted, cfg, final)
childCount++
} else {
registerReExecutedEngineScriptChild(
ctx, deps, r, projectID, defaultDataset, useLegacy,
parent, posted, cfg, st.sql)
childCount++
}
}
}
return childCount
}
func registerReExecutedEngineScriptChild(
ctx context.Context,
deps Dependencies,
r *http.Request,
projectID, defaultDataset string,
useLegacy bool,
parent *jobs.Job,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
stmtSQL string,
) {
childPosted := *posted
childPosted.JobReference.JobID = ""
childCfg := *cfg
qCopy := *cfg.Query
qCopy.Query = stmtSQL
childCfg.Query = &qCopy
child := newPendingJob(deps, projectID, &childPosted, &childCfg)
stampChildJobParent(child, parent.JobReference.JobID)
childStart := time.Now().UTC()
schema, rows, statementType, emulatorRoute, err := executeScriptStatement(
ctx, deps, projectID, defaultDataset, stmtSQL, useLegacy)
childEnd := time.Now().UTC()
if err != nil {
finalizeFailedJob(deps, child, childStart, err)
stampChildJobParent(child, parent.JobReference.JobID)
return
}
finalizeDoneJob(deps, child, childStart, childEnd,
schema, nil, rows, statementType, emulatorRoute, nil, nil, nil, r)
stampChildJobParent(child, parent.JobReference.JobID)
}
func registerFinalSelectChildJob(
deps Dependencies,
r *http.Request,
projectID string,
parent *jobs.Job,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
final engineScriptFinalResult,
) {
childPosted := *posted
childPosted.JobReference.JobID = ""
childCfg := *cfg
qCopy := *cfg.Query
childCfg.Query = &qCopy
child := newPendingJob(deps, projectID, &childPosted, &childCfg)
stampChildJobParent(child, parent.JobReference.JobID)
childStart := time.Now().UTC()
childEnd := time.Now().UTC()
finalizeDoneJob(deps, child, childStart, childEnd,
final.schema, nil, final.rows, final.statementType, final.emulatorRoute,
nil, nil, nil, r)
stampChildJobParent(child, parent.JobReference.JobID)
}
func runEngineScript(
ctx context.Context,
deps Dependencies,
r *http.Request,
projectID string,
parent *jobs.Job,
posted *jobs.Job,
cfg *jobs.JobConfiguration,
defaultDataset, sql string,
useLegacy bool,
) (*scriptExecOutcome, error) {
engineSQL := transformScriptDeclares(sql)
schema, rows, statementType, emulatorRoute, err := executeScriptStatement(
ctx, deps, projectID, defaultDataset, engineSQL, useLegacy)
if err != nil {
return nil, err
}
childCount := registerEngineScriptChildJobs(
ctx, deps, r, projectID, defaultDataset, useLegacy,
parent, posted, cfg, sql,
engineScriptFinalResult{
schema: schema,
rows: rows,
statementType: statementType,
emulatorRoute: emulatorRoute,
})
return &scriptExecOutcome{
childCount: childCount,
finalSchema: schema,
finalRows: rows,
finalStmtType: statementType,
finalRoute: emulatorRoute,
}, nil
}
package handlers
import "strings"
// scriptNeedsGoogleSQLExecutor mirrors
// backend/engine/coordinator/script_executor_internal.cc so the gateway
// preserves DECLARE syntax for scripts routed through
// googlesql::ScriptExecutor (CREATE CONSTANT lowering breaks IF/WHILE scope).
func scriptNeedsGoogleSQLExecutor(sql string) bool {
trimmed := strings.TrimSpace(sqlForScriptDetection(sql))
upper := strings.ToUpper(trimmed)
if strings.HasPrefix(upper, "IF ") ||
strings.HasPrefix(upper, "WHILE ") ||
strings.HasPrefix(upper, "LOOP ") ||
strings.HasPrefix(upper, "REPEAT") ||
strings.HasPrefix(upper, "FOR ") ||
strings.HasPrefix(upper, "RAISE ") ||
strings.HasPrefix(upper, "EXECUTE IMMEDIATE") ||
strings.HasPrefix(upper, "EXCEPTION") {
return true
}
upper = strings.ToUpper(sqlForScriptDetection(sql))
return strings.Contains(upper, " IF ") ||
strings.Contains(upper, "\nIF ") ||
strings.Contains(upper, " WHILE ") ||
strings.Contains(upper, "\nWHILE ") ||
strings.Contains(upper, " LOOP ") ||
strings.Contains(upper, "\nLOOP ") ||
strings.Contains(upper, " REPEAT") ||
strings.Contains(upper, "\nREPEAT") ||
(strings.Contains(upper, " FOR ") && strings.Contains(upper, " IN ")) ||
(strings.Contains(upper, "\nFOR ") && strings.Contains(upper, " IN ")) ||
strings.Contains(upper, "EXCEPTION") ||
strings.Contains(upper, " RAISE ") ||
strings.Contains(upper, "\nRAISE ") ||
strings.Contains(upper, "EXECUTE IMMEDIATE")
}
package handlers
import (
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/session"
)
func sessionStore(deps *Dependencies) *session.Store {
if deps.Sessions == nil {
deps.Sessions = NewSessionStore()
}
return deps.Sessions
}
func queryJobConnectionProperties(cfg *jobs.JobConfiguration) []bqtypes.ConnectionProperty {
if cfg == nil || cfg.Query == nil {
return nil
}
return cfg.Query.ConnectionProperties
}
func queryJobCreateSession(cfg *jobs.JobConfiguration) bool {
return cfg != nil && cfg.Query != nil && cfg.Query.CreateSession
}
func stampJobSessionInfo(job *jobs.Job, info *bqtypes.SessionInfo) {
if job == nil || info == nil {
return
}
job.Statistics.SessionInfo = info
}
package handlers
import (
"context"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// tableDataInsertAllKind is the `kind` field BigQuery uses on the
// success response of tabledata.insertAll. See
// docs/bigquery/docs/reference/rest/v2/tabledata/insertAll.md.
const tableDataInsertAllKind = "bigquery#tableDataInsertAllResponse"
// tableDataListKind is the `kind` field for a tabledata.list response.
// See docs/bigquery/docs/reference/rest/v2/tabledata/list.md.
const tableDataListKind = "bigquery#tableDataList"
// tableDataListDefaultMaxResults bounds the page size when the
// caller does not specify `maxResults`. Matches what most BigQuery
// client libraries pick on their own (the public API itself does not
// document a server-side default).
const tableDataListDefaultMaxResults = 10000
// tableDataListMaxResultsCap is the upper bound honored for maxResults.
const tableDataListMaxResultsCap = 100000
// decodeInsertAllBody parses the JSON body of tabledata.insertAll
// into the wire-shape struct. An empty body is rejected per the
// upstream spec (rows[] is required for a non-trivial request).
func decodeInsertAllBody(w http.ResponseWriter, r *http.Request) (bqtypes.TableDataInsertAllRequest, bool) {
var req bqtypes.TableDataInsertAllRequest
body, err := io.ReadAll(r.Body)
if err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not read tabledata.insertAll request body: "+err.Error())
return req, false
}
if len(body) == 0 {
return req, true
}
if err := json.Unmarshal(body, &req); err != nil {
writeError(w, http.StatusBadRequest, "invalid",
"Could not parse tabledata.insertAll request body as JSON: "+err.Error())
return req, false
}
return req, true
}
// jsonToCell converts a JSON-decoded value into a proto Cell using
// BigQuery's REST f/v wire shape conventions:
//
// - nil -> Cell.null_value = true
// - bool -> "true"/"false" string
// - json.Number -> decimal string verbatim
// - float64/int -> formatted decimal string (BigQuery's REST
// surface stringifies numerics, including INT64, NUMERIC,
// BIGNUMERIC; only FLOAT64 stays a JSON number on the wire,
// but the engine still stores it as a string)
// - string -> string verbatim
// - []byte -> base64 encoded string (BYTES wire shape)
// - []interface{}-> Array of converted cells
// - map[string]any -> Struct with fields in iteration order;
// used when no schema is available
//
// The conversion is intentionally lossy: a `Cell.string_value` is
// enough to round-trip through Storage::Value::String on the engine
// side because the catalog/storage path only requires the bytes to
// come back out shape-preserved. Typing tightens later via the
// resolved AST.
func jsonCellForField(f *enginepb.FieldSchema, v any) *enginepb.Cell {
if f == nil {
return jsonToCell(v)
}
if isJSONRepeatedFieldMode(f.GetMode()) {
arr, ok := v.([]any)
if !ok {
return jsonToCell(v)
}
elemSchema := jsonRepeatedElementSchema(f)
out := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(arr))}
for _, el := range arr {
out.Elements = append(out.Elements, jsonCellForField(elemSchema, el))
}
return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: out}}
}
if isJSONStructFieldType(f.GetType()) {
m, ok := v.(map[string]any)
if !ok {
return jsonToCell(v)
}
st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(f.GetFields()))}
for _, sub := range f.GetFields() {
subV, ok := m[sub.GetName()]
if !ok {
st.Fields = append(st.Fields, &enginepb.Cell{
Value: &enginepb.Cell_NullValue{NullValue: true},
})
continue
}
st.Fields = append(st.Fields, jsonCellForField(sub, subV))
}
return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
}
return jsonToCell(v)
}
func isJSONRepeatedFieldMode(mode string) bool {
return strings.EqualFold(strings.TrimSpace(mode), sqlModeRepeated)
}
func jsonRepeatedElementSchema(f *enginepb.FieldSchema) *enginepb.FieldSchema {
if f == nil {
return nil
}
return &enginepb.FieldSchema{
Name: f.GetName(),
Type: f.GetType(),
Description: f.GetDescription(),
Fields: f.GetFields(),
}
}
func isJSONStructFieldType(t string) bool {
switch strings.ToUpper(strings.TrimSpace(t)) {
case sqlTypeSTRUCT, sqlTypeRECORD:
return true
default:
return false
}
}
func jsonToCell(v any) *enginepb.Cell {
if v == nil {
return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}
}
switch val := v.(type) {
case bool:
if val {
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "true"}}
}
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "false"}}
case json.Number:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: string(val)}}
case float64:
// json.Decode produces float64 for any unmarshaled number when
// Decoder.UseNumber isn't set. Format with FormatFloat to keep
// integer-valued floats as bare integers (1.0 -> "1") and
// preserve precision for genuine fractions.
if val == float64(int64(val)) {
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatInt(int64(val), 10),
}}
}
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatFloat(val, 'g', -1, 64),
}}
case int:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.Itoa(val),
}}
case int64:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatInt(val, 10),
}}
case string:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: val}}
case []byte:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: base64.StdEncoding.EncodeToString(val),
}}
case []any:
arr := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(val))}
for _, el := range val {
arr.Elements = append(arr.Elements, jsonToCell(el))
}
return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: arr}}
case map[string]any:
st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(val))}
for _, fv := range val {
st.Fields = append(st.Fields, jsonToCell(fv))
}
return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
default:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: fmt.Sprintf("%v", val),
}}
}
}
// jsonRowToProto converts one insertAll JSON row into a proto DataRow
// by laying its fields out in the column order described by the
// table's gRPC schema. Missing fields become NULL cells so the cell
// count always matches the column count Storage::AppendRows expects.
// Extra fields not present in the schema are dropped (BigQuery's
// ignoreUnknownValues=false is approximated here by always ignoring;
// stricter semantics land alongside row-level validation in the
// query-execution work).
func jsonRowToProto(schema *enginepb.TableSchema, row map[string]any) *enginepb.DataRow {
out := &enginepb.DataRow{Cells: make([]*enginepb.Cell, 0, len(schema.GetFields()))}
for _, f := range schema.GetFields() {
v, ok := row[f.GetName()]
if !ok {
out.Cells = append(out.Cells, &enginepb.Cell{
Value: &enginepb.Cell_NullValue{NullValue: true},
})
continue
}
out.Cells = append(out.Cells, jsonCellForField(f, v))
}
return out
}
// TableDataInsertAll implements `bigquery.tabledata.insertAll`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/insertAll
//
// Flow: decode the JSON body, look up the destination table's schema
// over Catalog.DescribeTable (so we know the column order), convert
// each row's `json` map into a proto DataRow, and forward the batch
// to Catalog.InsertRows in one shot. A successful response is the
// standard `bigquery#tableDataInsertAllResponse` envelope; row-level
// failures end up in `insertErrors[*]` rather than as an RPC error.
//
// See docs/bigquery/docs/reference/rest/v2/tabledata/insertAll.md for
// the full request/response shapes the emulator targets.
func TableDataInsertAll(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID := tableIDFromPath(r)
if tableID == "" {
writeError(w, http.StatusBadRequest, "invalid",
"tableId is required")
return
}
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
body, ok := decodeInsertAllBody(w, r)
if !ok {
return
}
desc, err := deps.Catalog.DescribeTable(r.Context(), &enginepb.DescribeTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
})
if grpcToHTTPError(w, err) {
return
}
protoRows := make([]*enginepb.DataRow, 0, len(body.Rows))
for _, row := range body.Rows {
protoRows = append(protoRows, jsonRowToProto(desc.GetSchema(), row.JSON))
}
if len(protoRows) > 0 {
_, err = deps.Catalog.InsertRows(r.Context(), &enginepb.InsertRowsRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
Rows: protoRows,
})
if grpcToHTTPError(w, err) {
return
}
}
writeJSON(w, http.StatusOK, bqtypes.TableDataInsertAllResponse{
Kind: tableDataInsertAllKind,
})
}
}
// tableDataListParams holds parsed tabledata.list query parameters.
type tableDataListParams struct {
startIndex int64
maxResults int64
selectedFields []string
useInt64Timestamp bool
}
// TableDataList implements `bigquery.tabledata.list`:
//
// GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/data
//
// Pagination is honored via the documented `startIndex`, `maxResults`,
// and `pageToken` query parameters: pageToken (when supplied) is a
// decimal string encoding the next start row index, mirroring what
// `next_start_index` we return from the engine's ListRows.
// `selectedFields` projects top-level columns (dotted paths select the
// top-level STRUCT field). `formatOptions.useInt64Timestamp` controls
// TIMESTAMP JSON encoding. Logical views have no Parquet backing;
// tabledata.list returns empty rows — use jobs.query for view preview.
//
// See docs/bigquery/docs/reference/rest/v2/tabledata/list.md.
func TableDataList(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID := tableIDFromPath(r)
if tableID == "" {
writeError(w, http.StatusBadRequest, "invalid",
"tableId is required")
return
}
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
startIndex, maxResults, ok := tableDataListPaging(w, r.URL.Query())
if !ok {
return
}
listParams, ok := parseTableDataListParams(r.URL.Query(), startIndex, maxResults)
if !ok {
return
}
out, err := buildTableDataList(r.Context(), deps, projectID, datasetID, tableID, listParams)
if grpcToHTTPError(w, err) {
return
}
writeJSON(w, http.StatusOK, out)
}
}
func parseTableDataListParams(
q url.Values,
startIndex, maxResults int64,
) (tableDataListParams, bool) {
out := tableDataListParams{
startIndex: startIndex,
maxResults: maxResults,
}
if raw := strings.TrimSpace(q.Get("selectedFields")); raw != "" {
for part := range strings.SplitSeq(raw, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
// BigQuery paths like "e.d.f" select into nested fields; the
// gateway projects at top-level granularity (field before ".").
if dot := strings.Index(part, "."); dot >= 0 {
part = part[:dot]
}
out.selectedFields = append(out.selectedFields, part)
}
}
switch strings.ToLower(strings.TrimSpace(q.Get("formatOptions.useInt64Timestamp"))) {
case "1", "true", "t", "yes":
out.useInt64Timestamp = true
}
return out, true
}
func tableDataListPaging(w http.ResponseWriter, q url.Values) (startIndex, maxResults int64, ok bool) {
startIndex, ok = parsePositiveInt64(w, q.Get("startIndex"), "startIndex", 0)
if !ok {
return 0, 0, false
}
if tok := q.Get("pageToken"); tok != "" {
tokIdx, okTok := parsePositiveInt64(w, tok, "pageToken", 0)
if !okTok {
return 0, 0, false
}
startIndex = tokIdx
}
maxResults, ok = parsePositiveInt64(w, q.Get("maxResults"), "maxResults", tableDataListDefaultMaxResults)
if !ok {
return 0, 0, false
}
if maxResults > tableDataListMaxResultsCap {
maxResults = tableDataListMaxResultsCap
}
return startIndex, maxResults, ok
}
func buildTableDataList(
ctx context.Context,
deps Dependencies,
projectID, datasetID, tableID string,
params tableDataListParams,
) (bqtypes.TableDataList, error) {
table := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
desc, err := deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: table})
if err != nil {
return bqtypes.TableDataList{}, err
}
schema := desc.GetSchema()
formatOpts := bqtypes.WireFormatOptions{UseInt64Timestamp: params.useInt64Timestamp}
if params.maxResults == 0 {
total, totalErr := tableDataListTotalRows(ctx, deps.Catalog, table)
if totalErr != nil {
return bqtypes.TableDataList{}, totalErr
}
return bqtypes.TableDataList{
Kind: tableDataListKind,
Etag: tableDataListEtag(schema, total),
TotalRows: strconv.FormatInt(total, 10),
}, nil
}
resp, err := deps.Catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: table,
StartIndex: params.startIndex,
MaxResults: params.maxResults,
})
if err != nil {
return bqtypes.TableDataList{}, err
}
out := bqtypes.TableDataList{
Kind: tableDataListKind,
Etag: tableDataListEtag(schema, resp.GetTotalRows()),
TotalRows: strconv.FormatInt(resp.GetTotalRows(), 10),
}
if resp.GetNextStartIndex() < resp.GetTotalRows() && params.maxResults > 0 {
out.PageToken = strconv.FormatInt(resp.GetNextStartIndex(), 10)
}
fieldIdx := selectedFieldIndices(schema, params.selectedFields)
out.Rows = make([]bqtypes.Row, 0, len(resp.GetRows()))
for _, row := range resp.GetRows() {
full := bqtypes.CellsToRowForSchema(row.GetCells(), schema, formatOpts)
out.Rows = append(out.Rows, projectRowFields(full, fieldIdx))
}
return out, nil
}
func tableDataListTotalRows(
ctx context.Context,
catalog enginepb.CatalogClient,
table *enginepb.TableRef,
) (int64, error) {
resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: table,
StartIndex: 0,
MaxResults: 0,
})
if err != nil {
return 0, err
}
return resp.GetTotalRows(), nil
}
func tableDataListEtag(schema *enginepb.TableSchema, totalRows int64) string {
h := sha256.New()
for _, f := range schema.GetFields() {
_, _ = h.Write([]byte(f.GetName()))
_, _ = h.Write([]byte{0})
_, _ = h.Write([]byte(f.GetType()))
_, _ = h.Write([]byte{0})
_, _ = h.Write([]byte(f.GetMode()))
_, _ = h.Write([]byte{0})
}
_, _ = h.Write([]byte(strconv.FormatInt(totalRows, 10)))
return hex.EncodeToString(h.Sum(nil))[:32]
}
func selectedFieldIndices(schema *enginepb.TableSchema, selected []string) []int {
if schema == nil || len(selected) == 0 {
return nil
}
byName := map[string]int{}
for i, f := range schema.GetFields() {
byName[f.GetName()] = i
}
out := make([]int, 0, len(selected))
seen := map[int]struct{}{}
for _, name := range selected {
idx, ok := byName[name]
if !ok {
continue
}
if _, dup := seen[idx]; dup {
continue
}
seen[idx] = struct{}{}
out = append(out, idx)
}
if len(out) == 0 {
return nil
}
return out
}
func projectRowFields(row bqtypes.Row, fieldIdx []int) bqtypes.Row {
if len(fieldIdx) == 0 {
return row
}
out := bqtypes.Row{F: make([]bqtypes.Cell, 0, len(fieldIdx))}
for _, idx := range fieldIdx {
if idx >= 0 && idx < len(row.F) {
out.F = append(out.F, row.F[idx])
}
}
return out
}
// parsePositiveInt64 parses an unsigned decimal string from a query
// parameter. Empty input falls back to `defaultValue`. A malformed
// value writes a 400 envelope and returns ok=false so the caller can
// short-circuit.
func parsePositiveInt64(w http.ResponseWriter, raw, name string, defaultValue int64) (int64, bool) {
if raw == "" {
return defaultValue, true
}
v, err := strconv.ParseInt(raw, 10, 64)
if err != nil || v < 0 {
writeError(w, http.StatusBadRequest, "invalid",
fmt.Sprintf("Query parameter %q must be a non-negative integer", name))
return 0, false
}
return v, true
}
package handlers
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// tableKind is the value the BigQuery REST API returns for the `kind`
// field of a Table resource. See
// docs/bigquery/docs/reference/rest/v2/tables/get.md.
const tableKind = "bigquery#table"
// tableListKind is the `kind` field for a TableList response.
const tableListKind = "bigquery#tableList"
// defaultTableType is the value of the Table.type field for the
// non-view, non-external tables the emulator's Catalog tracks today.
const defaultTableType = "TABLE"
// viewTableType is the BigQuery REST type string for views created
// via tables.insert with a view definition.
const viewTableType = "VIEW"
// materializedViewTableType is the BigQuery REST type string for
// materialized views created via tables.insert with a materializedView
// definition (see QueryMaterializedViewIT).
const materializedViewTableType = "MATERIALIZED_VIEW"
// externalTableType is the BigQuery REST type string for GCS-backed
// external tables (tables.insert with externalDataConfiguration).
const externalTableType = "EXTERNAL"
// snapshotTableType is the BigQuery REST type string for table snapshots
// created via configuration.copy jobs with operationType=SNAPSHOT.
const snapshotTableType = "SNAPSHOT"
// tableIDFromPath returns the {projectId}/{datasetId}/{tableId}
// triple captured by the route pattern. It strips any AIP-136 custom-
// method suffix (e.g. ":getIamPolicy") from the tableId so the same
// helper can be reused by TableCustomMethodPOST.
func tableIDFromPath(r *http.Request) (projectID, datasetID, tableID string) {
projectID = r.PathValue("projectId")
datasetID = r.PathValue("datasetId")
tableID, _ = splitColonOp(r.PathValue("tableId"))
return projectID, datasetID, tableID
}
// tableResource builds a Table resource for a successful response.
// Preserves any caller-supplied Schema/FriendlyName/Description that
// the engine does not need to know about, and stamps the bookkeeping
// fields (Kind, ID, Type, timestamps) the REST client expects.
//
// Labels is materialized to an empty map when nil so the upstream
// `getTableLabels` sample's `Object.entries(table.metadata.labels)`
// call returns an empty iterator instead of erroring with
// `TypeError: Cannot convert undefined or null to object`. The
// bqtypes.Table.Labels tag omits `omitempty` so the empty map
// round-trips as `"labels":{}` on the wire. Mirrors datasetResource.
func tableResource(projectID, datasetID, tableID string, t bqtypes.Table) bqtypes.Table {
t.Kind = tableKind
t.ID = projectID + ":" + datasetID + "." + tableID
t.TableReference = bqtypes.TableReference{
ProjectID: projectID,
DatasetID: datasetID,
TableID: tableID,
}
if t.Type == "" {
t.Type = defaultTableType
}
if t.CreationTime == "" {
t.CreationTime = nowMillis()
}
if t.LastModifiedTime == "" {
t.LastModifiedTime = t.CreationTime
}
if t.Labels == nil {
t.Labels = bqtypes.ResourceLabels{}
}
if t.Location == "" {
t.Location = "US"
}
applyTableStorageStats(&t)
return t
}
// TableList implements `bigquery.tables.list`:
//
// GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables
//
// Calls the Catalog.ListTables RPC and folds the (deterministically
// ordered, ascending table_id) result into a BigQuery tableList
// envelope. Mirrors DatasetList's pagination posture: no
// `nextPageToken` today, every entry in one page.
//
// Per-entry shape matches upstream's tableList item: kind, id
// (projectId:datasetId.tableId), tableReference, type (defaulting to
// "TABLE"), and an empty labels object so node samples that call
// `Object.entries(item.metadata.labels)` on each iteration item do
// not raise.
func TableList(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
datasetID := r.PathValue("datasetId")
if deps.Catalog == nil {
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: tableListKind,
resourceKeyTables: []bqtypes.Table{},
resourceKeyTotalItems: 0,
})
return
}
resp, err := deps.Catalog.ListTables(r.Context(), &enginepb.ListTablesRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
})
if grpcToHTTPError(w, err) {
return
}
items := make([]map[string]any, 0, len(resp.GetTables()))
for _, ref := range resp.GetTables() {
items = append(items, tableListItem(r.Context(), deps, ref))
}
writeJSON(w, http.StatusOK, map[string]any{
resourceKeyKind: tableListKind,
resourceKeyTables: items,
resourceKeyTotalItems: len(items),
})
}
}
// TableInsert implements `bigquery.tables.insert`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables
//
// Decodes the Table body, forwards the (TableRef, schema) pair to
// Catalog.RegisterTable, and returns the new Table resource on
// success. tableReference.tableId in the body is required.
func TableInsert(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID := r.PathValue("projectId")
datasetID := r.PathValue("datasetId")
t, ok := decodeTableBody(w, r)
if !ok {
return
}
tableID := t.TableReference.TableID
if tableID == "" {
writeError(w, http.StatusBadRequest, "invalid",
"tableReference.tableId is required")
return
}
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
if !populateMaterializedViewSchema(w, deps, r, projectID, &t) {
return
}
if !populateViewSchema(w, deps, r, projectID, &t) {
return
}
if rejectUnsupportedTablePosture(w, &t) {
return
}
if !registerInsertedTable(w, r, deps, projectID, datasetID, tableID, &t) {
return
}
writeInsertedTableResponse(w, deps, r, projectID, datasetID, tableID, t)
}
}
// populateMaterializedViewSchema fills Type and Schema on REST MV inserts
// when the client omits schema. Dry-running the MV query lets SELECT *
// expand to analyzed columns instead of zero. Returns false when the
// handler already wrote an error response.
func populateMaterializedViewSchema(
w http.ResponseWriter,
deps Dependencies,
r *http.Request,
projectID string,
t *bqtypes.Table,
) bool {
if t.MaterializedView == nil || t.MaterializedView.Query == "" {
return true
}
if t.Type == "" {
t.Type = materializedViewTableType
}
if t.Schema != nil && len(t.Schema.Fields) > 0 {
return true
}
inferred, inferErr := inferTableSchemaFromQuery(
deps, r, projectID, t.MaterializedView.Query)
if inferErr != nil {
if queryGRPCToHTTPError(w, inferErr) {
return false
}
writeError(w, http.StatusInternalServerError, reasonInternalError,
"Could not infer materialized view schema: "+inferErr.Error())
return false
}
if inferred != nil {
t.Schema = inferred
}
return true
}
// populateViewSchema fills Type and Schema on REST view inserts when
// the client omits schema. Dry-running the view query lets SELECT *
// expand to analyzed columns instead of zero.
func populateViewSchema(
w http.ResponseWriter,
deps Dependencies,
r *http.Request,
projectID string,
t *bqtypes.Table,
) bool {
if t.View == nil || t.View.Query == "" {
return true
}
if t.Type == "" {
t.Type = viewTableType
}
if t.Schema != nil && len(t.Schema.Fields) > 0 {
return true
}
inferred, inferErr := inferTableSchemaFromQuery(deps, r, projectID, t.View.Query)
if inferErr != nil {
if queryGRPCToHTTPError(w, inferErr) {
return false
}
writeError(w, http.StatusInternalServerError, reasonInternalError,
"Could not infer view schema: "+inferErr.Error())
return false
}
if inferred != nil {
t.Schema = inferred
}
return true
}
// insertLogicalView registers a REST-created logical view in the
// engine by issuing a `CREATE OR REPLACE VIEW` statement — the same
// path `CREATE VIEW` DDL takes. That lands the view in the engine's
// view registry so a later `SELECT ... FROM <view>` has its stored
// definition inlined at analyze time and returns the base rows. The
// alternative (registering an empty backing table) shadows the view in
// the engine catalog and makes reads return nothing.
//
// Each name component is backtick-quoted independently so project IDs
// with hyphens (and other names that are not bare identifiers) resolve
// as a three-part `project.dataset.view` path rather than a single
// dotted identifier. Returns false (after writing an HTTP error) when
// registration fails.
func insertLogicalView(
w http.ResponseWriter,
r *http.Request,
deps Dependencies,
projectID, datasetID, tableID, viewQuery string,
) bool {
if deps.Query == nil {
writeError(w, http.StatusNotImplemented, reasonInternalError,
"engine query client is not configured for view registration")
return false
}
ddl := fmt.Sprintf("CREATE OR REPLACE VIEW `%s`.`%s`.`%s` AS\n%s",
projectID, datasetID, tableID, viewQuery)
stream, err := deps.Query.ExecuteQuery(r.Context(), &enginepb.QueryRequest{
ProjectId: projectID,
Sql: ddl,
})
if err == nil && stream == nil {
err = errors.New("engine returned no result stream for view registration")
}
if err == nil {
_, _, _, _, _, _, err = drainSyncStream(stream)
}
if err != nil {
if queryGRPCToHTTPError(w, err) {
return false
}
writeError(w, http.StatusInternalServerError, reasonInternalError,
"Could not register view: "+err.Error())
return false
}
return true
}
// inferTableSchemaFromQuery runs the MV definition query through the
// engine DryRun RPC and returns the analyzed output schema as REST
// TableSchema. Returns (nil, nil) when Query client is nil or sql is
// empty so callers can still register a schema-less table.
func inferTableSchemaFromQuery(deps Dependencies, r *http.Request,
projectID, sql string,
) (*bqtypes.TableSchema, error) {
if deps.Query == nil || sql == "" {
return nil, nil
}
resp, err := deps.Query.DryRun(r.Context(), &enginepb.QueryRequest{
ProjectId: projectID,
Sql: sql,
})
if err != nil {
return nil, err
}
return schemaFromProto(resp.GetSchema()), nil
}
// tableFromDescribeResponse maps a Catalog.DescribeTable RPC payload
// into the REST Table shape, including logical-view metadata when the
// engine resolved the target from the view registry.
func tableFromDescribeResponse(resp *enginepb.DescribeTableResponse) bqtypes.Table {
t := bqtypes.Table{Schema: normalizeRESTTableSchema(schemaFromProto(resp.GetSchema()))}
if tableType := resp.GetTableType(); tableType != "" {
t.Type = tableType
}
if viewQuery := resp.GetViewQuery(); viewQuery != "" {
t.View = &bqtypes.ViewDefinition{
Query: viewQuery,
UseLegacySQL: resp.GetViewUseLegacySql(),
}
}
return t
}
// TableGet implements `bigquery.tables.get`:
//
// GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
//
// Resolves the table via Catalog.DescribeTable so a missing table
// surfaces as 404. The response composites the (Kind, TableReference,
// schema) into a Table resource; other metadata is left empty until
// Storage tracks it.
func TableGet(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID := tableIDFromPath(r)
if deps.Catalog == nil {
writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, bqtypes.Table{}))
return
}
resp, err := deps.Catalog.DescribeTable(r.Context(), &enginepb.DescribeTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
})
if err != nil {
// A logical view has no backing storage table, so the
// engine's DescribeTable returns NotFound. Serve it from
// the REST metadata overlay recorded at tables.insert
// instead of 404 so a `create_table(view)` + `get_table`
// round-trip keeps working (the view rows still come from
// the query path, which inlines the registered definition).
if overlay, ok := deps.Metadata.GetTable(projectID, datasetID, tableID); ok &&
(overlay.View != nil || overlay.MaterializedView != nil) {
writeJSON(w, http.StatusOK,
tableResource(projectID, datasetID, tableID, overlay))
return
}
grpcToHTTPError(w, err)
return
}
t := catalogTable(r.Context(), deps, projectID, datasetID, tableID, resp)
writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, t))
}
}
// TableUpdate implements `bigquery.tables.update`:
//
// PUT /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
//
// Full replacement of the Table metadata. The engine has no update RPC
// yet, so the handler echoes the request body back as the canonical
// resource (stamping kind/id/timestamps). The REST-only metadata
// fields (labels, expirationTime, rangePartitioning, ...) are also
// stashed in the in-memory MetadataStore so a follow-up GET returns
// the updated values instead of the engine-only schema view.
func TableUpdate(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID := tableIDFromPath(r)
t, ok := decodeTableBody(w, r)
if !ok {
return
}
if rejectUnsupportedTablePosture(w, &t) {
return
}
deps.Metadata.PutTable(projectID, datasetID, tableID, t)
SyncColumnGovernanceFromSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema)
writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, t))
}
}
// TablePatch implements `bigquery.tables.patch`:
//
// PATCH /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
//
// Sparse update; mirrors TableUpdate's metadata-stash posture so
// upstream `setMetadata` + `getMetadata` sequences roundtrip the
// REST-only fields. The engine has no true patch RPC yet.
func TablePatch(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID := tableIDFromPath(r)
t, ok := decodeTableBody(w, r)
if !ok {
return
}
if rejectUnsupportedTablePosture(w, &t) {
return
}
deps.Metadata.MergeTable(projectID, datasetID, tableID, t)
if err := syncPatchedTableSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema); err != nil {
writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
return
}
SyncColumnGovernanceFromSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema)
if deps.Catalog == nil {
out := t
if merged, ok := deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
out = merged
}
if t.LabelsPatchPresent() && len(out.Labels) == 0 {
out.SetOmitEmptyLabelsOnWire(true)
}
writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, out))
return
}
tableRef := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
desc, err := deps.Catalog.DescribeTable(r.Context(), &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
grpcToHTTPError(w, err)
return
}
out := catalogTable(r.Context(), deps, projectID, datasetID, tableID, desc)
if t.LabelsPatchPresent() && len(out.Labels) == 0 {
out.SetOmitEmptyLabelsOnWire(true)
}
writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, out))
}
}
// TableDelete implements `bigquery.tables.delete`:
//
// DELETE /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
func TableDelete(deps Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
projectID, datasetID, tableID := tableIDFromPath(r)
if deps.Catalog == nil {
NotImplemented(w, r)
return
}
if deps.Snapshots != nil {
_ = deps.Snapshots.CaptureBeforeDelete(r.Context(), deps.Catalog,
projectID, datasetID, tableID)
}
_, err := deps.Catalog.DropTable(r.Context(), &enginepb.DropTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
})
if grpcToHTTPError(w, err) {
return
}
deps.Metadata.DeleteTable(projectID, datasetID, tableID)
writeJSON(w, http.StatusOK, struct{}{})
}
}
// localStubIamPolicyEtag is the deterministic etag returned by the
// emulator's metadata-only table IAM stub (no real ACL store).
const localStubIamPolicyEtag = "BwWWja0YfJA="
func localStubEmptyIamPolicy() map[string]any {
return map[string]any{
"version": 1,
"bindings": []any{},
"etag": localStubIamPolicyEtag,
}
}
// TableGetIamPolicy implements `bigquery.tables.getIamPolicy`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}:getIamPolicy
//
// Reached via TableCustomMethodPOST after parsing the trailing :op.
func TableGetIamPolicy(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, _ *http.Request) {
writeJSON(w, http.StatusOK, localStubEmptyIamPolicy())
}
}
// TableSetIamPolicy implements `bigquery.tables.setIamPolicy`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}:setIamPolicy
//
// Reached via TableCustomMethodPOST after parsing the trailing :op.
func TableSetIamPolicy(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req struct {
Policy map[string]any `json:"policy"`
}
_ = json.NewDecoder(r.Body).Decode(&req)
pol := req.Policy
if pol == nil {
pol = localStubEmptyIamPolicy()
} else {
if _, ok := pol["bindings"]; !ok {
pol["bindings"] = []any{}
}
if _, ok := pol["etag"]; !ok {
pol["etag"] = localStubIamPolicyEtag
}
}
writeJSON(w, http.StatusOK, pol)
}
}
// TableTestIamPermissions implements `bigquery.tables.testIamPermissions`:
//
// POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}:testIamPermissions
//
// Reached via TableCustomMethodPOST after parsing the trailing :op.
func TableTestIamPermissions(_ Dependencies) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { NotImplemented(w, r) }
}
// TableCustomMethodPOST dispatches the AIP-136 custom-method POST
// endpoints registered against `/tables/{tableId}` -- the three IAM
// helpers BigQuery exposes for table resources.
func TableCustomMethodPOST(deps Dependencies) http.HandlerFunc {
getPolicy := TableGetIamPolicy(deps)
setPolicy := TableSetIamPolicy(deps)
testPerms := TableTestIamPermissions(deps)
return func(w http.ResponseWriter, r *http.Request) {
_, op := splitColonOp(r.PathValue("tableId"))
switch op {
case "getIamPolicy":
getPolicy(w, r)
case "setIamPolicy":
setPolicy(w, r)
case "testIamPermissions":
testPerms(w, r)
case "":
writeError(w, http.StatusMethodNotAllowed, "invalid",
"POST is not allowed on a table resource. "+
"Use POST /tables to create, /insertAll to stream rows, "+
"or a documented :op IAM custom method.")
default:
writeError(w, http.StatusNotFound, "notFound",
"Unknown table custom method ':"+op+"'.")
}
}
}
package handlers
import (
"context"
"strconv"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/load"
)
// tableListItem builds one tables.list entry from Catalog.ListTables
// output plus metadata overlay and optional DescribeTable view query.
func tableListItem(ctx context.Context, deps Dependencies, ref *enginepb.TableRef) map[string]any {
overlay, hasOverlay := deps.Metadata.GetTable(
ref.GetProjectId(), ref.GetDatasetId(), ref.GetTableId(),
)
labels := bqtypes.ResourceLabels{}
if hasOverlay && overlay.Labels != nil {
labels = overlay.Labels
}
tableType := defaultTableType
if hasOverlay && overlay.Type != "" {
tableType = overlay.Type
} else if refType := ref.GetTableType(); refType != "" {
tableType = refType
}
item := map[string]any{
"kind": tableKind,
"id": ref.GetProjectId() + ":" + ref.GetDatasetId() +
"." + ref.GetTableId(),
"tableReference": bqtypes.TableReference{
ProjectID: ref.GetProjectId(),
DatasetID: ref.GetDatasetId(),
TableID: ref.GetTableId(),
},
"type": tableType,
"labels": labels,
}
if hasOverlay {
mergeListViewQueryFromOverlay(overlay, item)
}
if _, hasView := item["view"]; !hasView {
if _, hasMV := item["materializedView"]; !hasMV {
mergeListViewQueryFromCatalog(ctx, deps, ref, tableType, item)
}
}
return item
}
func mergeListViewQueryFromOverlay(overlay bqtypes.Table, item map[string]any) {
if overlay.View != nil && overlay.View.Query != "" {
item["view"] = map[string]any{discoveryMethodQuery: overlay.View.Query}
}
if overlay.MaterializedView != nil && overlay.MaterializedView.Query != "" {
item["materializedView"] = map[string]any{
discoveryMethodQuery: overlay.MaterializedView.Query,
}
}
}
// catalogTable builds the REST Table resource the same way TableGet does
// after a successful DescribeTable (engine schema + metadata overlay).
func catalogTable(
ctx context.Context,
deps Dependencies,
projectID, datasetID, tableID string,
resp *enginepb.DescribeTableResponse,
) bqtypes.Table {
t := tableFromDescribeResponse(resp)
if overlay, ok := deps.Metadata.GetDataset(projectID, datasetID); ok && overlay.Location != "" {
t.Location = overlay.Location
}
if overlay, ok := deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
t = applyTableMetadataOverlay(t, overlay)
}
if t.DefaultCollation != "" {
t.Schema = bqtypes.ApplyDefaultCollationToStringFields(t.Schema, t.DefaultCollation)
}
if deps.Snapshots != nil {
if ct, ok := deps.Snapshots.CreationTimeMs(projectID, datasetID, tableID); ok && t.CreationTime == "" {
t.CreationTime = strconv.FormatInt(ct, 10)
}
}
if deps.Catalog != nil {
if rowsResp, listErr := deps.Catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
StartIndex: 0,
MaxResults: 0,
}); listErr == nil {
t.NumRows = strconv.FormatInt(rowsResp.GetTotalRows(), 10)
} else if t.NumRows == "" {
t.NumRows = "0"
}
}
applyTableStorageStats(&t)
return t
}
// mergeListViewQueryFromCatalog attaches view.query (or
// materializedView.query) from Catalog.DescribeTable when the metadata
// overlay did not stash DDL text — e.g. three-segment backtick CREATE
// VIEW forms the gateway parser does not mirror into overlay.
func mergeListViewQueryFromCatalog(
ctx context.Context,
deps Dependencies,
ref *enginepb.TableRef,
tableType string,
item map[string]any,
) {
if deps.Catalog == nil {
return
}
isView := tableType == viewTableType || ref.GetTableType() == viewTableType
isMV := tableType == materializedViewTableType ||
ref.GetTableType() == materializedViewTableType
if !isView && !isMV {
return
}
desc, err := deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{
Table: &enginepb.TableRef{
ProjectId: ref.GetProjectId(),
DatasetId: ref.GetDatasetId(),
TableId: ref.GetTableId(),
},
})
if err != nil {
return
}
if isMV {
if q := desc.GetViewQuery(); q != "" {
item["materializedView"] = map[string]any{discoveryMethodQuery: q}
}
return
}
if q := desc.GetViewQuery(); q != "" {
item["view"] = map[string]any{discoveryMethodQuery: q}
}
}
// applyTableStorageStats fills output-only byte counters so the console
// Details tab shows explicit zeros instead of em dashes. NumRows is
// computed from Catalog.ListRows; byte breakdowns are stubbed until
// the engine exposes storage statistics RPCs.
func applyTableStorageStats(t *bqtypes.Table) {
if t.NumBytes == "" {
t.NumBytes = "0"
}
if t.NumLongTermBytes == "" {
t.NumLongTermBytes = "0"
}
if t.NumActiveLogicalBytes == "" {
t.NumActiveLogicalBytes = "0"
}
if t.NumTotalLogicalBytes == "" {
t.NumTotalLogicalBytes = "0"
}
if t.NumCurrentPhysicalBytes == "" {
t.NumCurrentPhysicalBytes = "0"
}
if t.NumPhysicalBytes == "" {
t.NumPhysicalBytes = "0"
}
if t.NumActivePhysicalBytes == "" {
t.NumActivePhysicalBytes = "0"
}
if t.NumLongTermPhysicalBytes == "" {
t.NumLongTermPhysicalBytes = "0"
}
if t.NumTimeTravelPhysicalBytes == "" {
t.NumTimeTravelPhysicalBytes = "0"
}
}
// syncPatchedTableSchema registers schema fields added via tables.patch
// (setMetadata) so tables.get returns engine-backed column types instead
// of overlay-only stubs.
func syncPatchedTableSchema(
ctx context.Context,
deps Dependencies,
projectID, datasetID, tableID string,
patchSchema *bqtypes.TableSchema,
) error {
if deps.Catalog == nil || patchSchema == nil || len(patchSchema.Fields) == 0 {
return nil
}
tableRef := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
desc, err := deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
return err
}
existing := schemaFromProto(desc.GetSchema())
merged, changed, err := load.MergeSchemasForTablePatch(existing, patchSchema)
if err != nil {
return err
}
if !changed {
return nil
}
_, err = load.ApplySchemaUpdate(ctx, deps.Catalog, tableRef, merged, load.TablePatchSchemaOptions)
return err
}
package handlers
import (
"net/http"
"strconv"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// registerInsertedTable forwards the decoded Table to the engine catalog
// (view registry, external table path, or generic RegisterTable).
// Returns false when the handler already wrote an error response.
func registerInsertedTable(
w http.ResponseWriter,
r *http.Request,
deps Dependencies,
projectID, datasetID, tableID string,
t *bqtypes.Table,
) bool {
switch {
case t.View != nil && t.View.Query != "":
// A logical view must be registered in the engine's view
// registry (the same path CREATE VIEW DDL takes) so reads
// inline its stored definition. Registering an empty backing
// table instead — as the generic branch below does — shadows
// the view in the engine catalog (FindTable resolves storage
// before the view registry), so SELECT ... FROM <view>
// silently returns zero rows. This is the REST-API analogue
// of the CREATE-VIEW-on-read fix.
return insertLogicalView(w, r, deps, projectID, datasetID, tableID, t.View.Query)
case t.ExternalDataConfiguration != nil:
return insertExternalTable(w, r, deps, projectID, datasetID, tableID, t)
default:
_, err := deps.Catalog.RegisterTable(r.Context(), &enginepb.RegisterTableRequest{
Table: &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
},
Schema: schemaToProto(t.Schema),
})
return !grpcToHTTPError(w, err)
}
}
func writeInsertedTableResponse(
w http.ResponseWriter,
deps Dependencies,
r *http.Request,
projectID, datasetID, tableID string,
t bqtypes.Table,
) {
if t.DefaultCollation != "" {
t.Schema = bqtypes.ApplyDefaultCollationToStringFields(t.Schema, t.DefaultCollation)
}
deps.Metadata.PutTable(projectID, datasetID, tableID, t)
SyncColumnGovernanceFromSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema)
created := nowMillis()
if deps.Snapshots != nil {
if ms, parseErr := strconv.ParseInt(created, 10, 64); parseErr == nil {
deps.Snapshots.RecordCreation(projectID, datasetID, tableID, ms)
}
}
out := t
if out.DefaultCollation != "" {
out.Schema = bqtypes.ApplyDefaultCollationToStringFields(out.Schema, out.DefaultCollation)
}
writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, out))
}
package handlers
import (
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// schemaToProto converts a REST TableSchema into the gRPC TableSchema
// the engine accepts. Returns nil when the REST schema is nil so the
// proto's default zero-value gets sent on the wire.
func schemaToProto(s *bqtypes.TableSchema) *enginepb.TableSchema {
if s == nil {
return nil
}
out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.Fields))}
for i := range s.Fields {
out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
}
return out
}
// fieldToProto recursively converts a REST TableFieldSchema into the
// gRPC FieldSchema. Nested STRUCT/RECORD fields are walked verbatim.
func fieldToProto(f bqtypes.TableFieldSchema) *enginepb.FieldSchema {
out := &enginepb.FieldSchema{
Name: f.Name,
Type: f.Type,
Mode: f.Mode,
Description: f.Description,
}
for i := range f.Fields {
out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
}
return out
}
// schemaFromProto is the inverse of schemaToProto: turns a gRPC
// TableSchema into the REST TableSchema. Returns nil for an absent or
// empty schema so the JSON response omits the field.
func schemaFromProto(s *enginepb.TableSchema) *bqtypes.TableSchema {
if s == nil || len(s.Fields) == 0 {
return nil
}
out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, 0, len(s.Fields))}
for _, f := range s.Fields {
out.Fields = append(out.Fields, fieldFromProto(f))
}
return out
}
func fieldFromProto(f *enginepb.FieldSchema) bqtypes.TableFieldSchema {
fieldType := normalizeRESTFieldType(f.GetType())
if strings.EqualFold(fieldType, "STRUCT") {
fieldType = "RECORD"
}
out := bqtypes.TableFieldSchema{
Name: normalizeRESTFieldName(f.GetName()),
Type: fieldType,
Mode: f.GetMode(),
Description: f.GetDescription(),
}
for _, sub := range f.GetFields() {
out.Fields = append(out.Fields, fieldFromProto(sub))
}
return out
}
// normalizeRESTFieldName maps analyzer-synthesized column names ($col1, …)
// to the f0_, f1_, … aliases the Node client expects for anonymous SELECT
// outputs (queryParamsTimestamps sample reads row.f0_).
func normalizeRESTFieldName(name string) string {
if len(name) >= 5 && strings.HasPrefix(name, "$col") {
if n, err := strconv.Atoi(name[4:]); err == nil && n > 0 {
return "f" + strconv.Itoa(n-1) + "_"
}
}
return name
}
func normalizeRESTFieldType(t string) string {
switch strings.ToUpper(strings.TrimSpace(t)) {
case sqlTypeINT64:
return sqlTypeINTEGER
case "FLOAT64":
return "FLOAT"
case "BOOL":
return "BOOLEAN"
default:
return t
}
}
func normalizeRESTTableSchema(s *bqtypes.TableSchema) *bqtypes.TableSchema {
if s == nil {
return nil
}
out := *s
out.Fields = make([]bqtypes.TableFieldSchema, len(s.Fields))
for i, f := range s.Fields {
out.Fields[i] = f
out.Fields[i].Type = normalizeRESTFieldType(f.Type)
if len(f.Fields) > 0 {
nested := &bqtypes.TableSchema{Fields: f.Fields}
if norm := normalizeRESTTableSchema(nested); norm != nil {
out.Fields[i].Fields = norm.Fields
}
}
}
return &out
}
package handlers
// persistViewFromDDL registers a view parsed from CREATE [OR REPLACE]
// VIEW / CREATE [OR REPLACE] MATERIALIZED VIEW DDL in the gateway
// MetadataStore so tables.list / tables.get surface type and
// view.query (or materializedView.query) for query-job-created views.
func persistViewFromDDL(
deps *Dependencies,
projectID, defaultDatasetID, sql string,
) {
t, ok := parseCreateViewDDL(projectID, defaultDatasetID, sql)
if !ok {
return
}
ref := t.TableReference
deps.Metadata.PutTable(ref.ProjectID, ref.DatasetID, ref.TableID, t)
}
// evictViewFromDDL removes view metadata stashed by persistViewFromDDL
// after DROP VIEW / DROP MATERIALIZED VIEW DDL. DROP VIEW surfaces as
// statementType DROP_TABLE in the engine envelope; parseDropViewDDL
// distinguishes it from DROP TABLE.
func evictViewFromDDL(
deps *Dependencies,
projectID, defaultDatasetID, sql string,
materializedOnly bool,
) {
pID, dID, tID, ok := parseDropViewDDL(projectID, defaultDatasetID, sql, materializedOnly)
if !ok {
return
}
deps.Metadata.DeleteTable(pID, dID, tID)
}
// handleViewDDLAfterQuery mirrors routines/models DDL persistence for
// views created or dropped through jobs.query / jobs.insert query jobs.
func handleViewDDLAfterQuery(
deps *Dependencies,
projectID, defaultDatasetID, sql, statementType string,
) {
switch statementType {
case "CREATE_VIEW", "CREATE_MATERIALIZED_VIEW":
persistViewFromDDL(deps, projectID, defaultDatasetID, sql)
case "DROP_MATERIALIZED_VIEW":
evictViewFromDDL(deps, projectID, defaultDatasetID, sql, true)
case "DROP_TABLE":
evictViewFromDDL(deps, projectID, defaultDatasetID, sql, false)
}
}
package handlers
import (
"strings"
"unicode"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// parseCreateViewDDL extracts the target table reference and AS-query
// from CREATE [OR REPLACE] [MATERIALIZED] VIEW DDL. defaultDatasetID
// applies when the view name is one- or two-part qualified.
func parseCreateViewDDL(projectID, defaultDatasetID, sql string) (bqtypes.Table, bool) {
rest, materialized, ok := stripCreateViewHeader(sql)
if !ok {
return bqtypes.Table{}, false
}
name, rest, ok := parseViewQuotedName(rest)
if !ok {
return bqtypes.Table{}, false
}
pID, dID, tID := splitViewTableName(projectID, defaultDatasetID, name)
query, ok := parseViewQueryFromRest(rest)
if !ok {
return bqtypes.Table{}, false
}
t := bqtypes.Table{
Type: viewTableType,
View: &bqtypes.ViewDefinition{Query: query},
}
if materialized {
t.Type = materializedViewTableType
t.View = nil
t.MaterializedView = &bqtypes.MaterializedViewDefinition{Query: query}
}
_ = pID
_ = dID
_ = tID
t.TableReference = bqtypes.TableReference{
ProjectID: pID,
DatasetID: dID,
TableID: tID,
}
return t, true
}
// parseDropViewDDL extracts the target of DROP [MATERIALIZED] VIEW
// [IF EXISTS] DDL. When materializedOnly is true, only materialized-
// view drop forms match; when false, only logical-view drop forms match.
func parseDropViewDDL(
projectID, defaultDatasetID, sql string,
materializedOnly bool,
) (pID, dID, tID string, ok bool) {
rest, materialized, ok := stripDropViewHeader(sql)
if !ok {
return "", "", "", false
}
if materializedOnly && !materialized {
return "", "", "", false
}
if !materializedOnly && materialized {
return "", "", "", false
}
name, _, ok := parseViewQuotedName(rest)
if !ok {
return "", "", "", false
}
pID, dID, tID = splitViewTableName(projectID, defaultDatasetID, name)
return pID, dID, tID, true
}
func stripCreateViewHeader(sql string) (rest string, materialized bool, ok bool) {
trimmed := strings.TrimSpace(sql)
upper := strings.ToUpper(trimmed)
for _, p := range []struct {
prefix string
mat bool
}{
{"CREATE OR REPLACE MATERIALIZED VIEW", true},
{"CREATE MATERIALIZED VIEW", true},
{"CREATE OR REPLACE VIEW", false},
{"CREATE VIEW", false},
} {
if strings.HasPrefix(upper, p.prefix) {
return strings.TrimSpace(trimmed[len(p.prefix):]), p.mat, true
}
}
return "", false, false
}
func stripDropViewHeader(sql string) (rest string, materialized bool, ok bool) {
trimmed := strings.TrimSpace(sql)
upper := strings.ToUpper(trimmed)
for _, p := range []struct {
prefix string
mat bool
}{
{"DROP MATERIALIZED VIEW IF EXISTS", true},
{"DROP MATERIALIZED VIEW", true},
{"DROP VIEW IF EXISTS", false},
{"DROP VIEW", false},
} {
if strings.HasPrefix(upper, p.prefix) {
return strings.TrimSpace(trimmed[len(p.prefix):]), p.mat, true
}
}
return "", false, false
}
func parseViewQuotedName(s string) (name, rest string, ok bool) {
s = strings.TrimSpace(s)
if len(s) == 0 {
return "", "", false
}
if s[0] == '`' {
end := strings.Index(s[1:], "`")
if end < 0 {
return "", "", false
}
return s[1 : end+1], strings.TrimSpace(s[end+2:]), true
}
i := 0
for i < len(s) && !unicode.IsSpace(rune(s[i])) {
i++
}
if i == 0 {
return "", "", false
}
return s[:i], strings.TrimSpace(s[i:]), true
}
func splitViewTableName(projectID, defaultDatasetID, name string) (project, dataset, table string) {
parts := strings.Split(name, ".")
switch len(parts) {
case 1:
return projectID, defaultDatasetID, parts[0]
case 2:
return projectID, parts[0], parts[1]
default:
return parts[0], parts[1], parts[len(parts)-1]
}
}
func parseViewQueryFromRest(rest string) (string, bool) {
rest = skipViewOptionsClause(strings.TrimSpace(rest))
rest = strings.TrimSpace(rest)
idx, ok := findTopLevelAS(rest)
if !ok {
return "", false
}
query := strings.TrimSpace(rest[idx:])
if query == "" {
return "", false
}
return query, true
}
func skipViewOptionsClause(rest string) string {
rest = strings.TrimSpace(rest)
for strings.HasPrefix(strings.ToUpper(rest), "OPTIONS") {
if !strings.HasPrefix(rest, "(") && !strings.HasPrefix(strings.ToUpper(rest), "OPTIONS(") {
break
}
open := strings.Index(rest, "(")
if open < 0 {
break
}
inner, tail, ok := scanViewBalanced(rest[open:], '(', ')')
if !ok {
break
}
_ = inner
rest = strings.TrimSpace(tail)
}
return rest
}
func findTopLevelAS(s string) (after int, ok bool) {
depth := 0
angle := 0
inQuote := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inQuote != 0 {
if c == '\\' && i+1 < len(s) {
i++
continue
}
if c == inQuote {
inQuote = 0
}
continue
}
switch c {
case '\'', '"', '`':
inQuote = c
case '<':
angle++
case '>':
if angle > 0 {
angle--
}
case '(', '[':
depth++
case ')', ']':
if depth > 0 {
depth--
}
}
if depth == 0 && angle == 0 && isViewASKeywordAt(s, i) {
return i + 2, true
}
}
return 0, false
}
func isViewASKeywordAt(s string, i int) bool {
if i+2 > len(s) || !strings.EqualFold(s[i:i+2], "AS") {
return false
}
if i > 0 && isViewIdentChar(s[i-1]) {
return false
}
if i+2 < len(s) && isViewIdentChar(s[i+2]) {
return false
}
return true
}
func isViewIdentChar(b byte) bool {
return unicode.IsLetter(rune(b)) || unicode.IsDigit(rune(b)) || b == '_'
}
func scanViewBalanced(s string, open, close byte) (inner, rest string, ok bool) {
if len(s) == 0 || s[0] != open {
return "", "", false
}
depth := 0
inQuote := byte(0)
for i := 0; i < len(s); i++ {
c := s[i]
if inQuote != 0 {
if c == '\\' && i+1 < len(s) {
i++
continue
}
if c == inQuote {
inQuote = 0
}
continue
}
switch c {
case '\'', '"', '`':
inQuote = c
case open:
depth++
case close:
depth--
if depth == 0 {
return s[1:i], strings.TrimSpace(s[i+1:]), true
}
}
}
return "", "", false
}
package jobs
import (
"strconv"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// InfoSchemaJobsDataset is the internal dataset used when rewriting
// INFORMATION_SCHEMA.JOBS* queries to a catalog table the engine can scan.
const InfoSchemaJobsDataset = "_bqemu_jobs"
// InfoSchemaJobsTable is the table id holding materialized job rows.
const InfoSchemaJobsTable = "JOBS"
const (
infoSchemaColProjectID = "project_id"
infoSchemaTypeString = "STRING"
infoSchemaTypeTimestamp = "TIMESTAMP"
infoSchemaTypeInt64 = "INT64"
infoSchemaTypeStruct = "STRUCT"
)
// InfoSchemaJobsSchema is the column layout for the materialized JOBS view.
func InfoSchemaJobsSchema() *enginepb.TableSchema {
return &enginepb.TableSchema{Fields: []*enginepb.FieldSchema{
{Name: "job_id", Type: infoSchemaTypeString},
{Name: "creation_time", Type: infoSchemaTypeTimestamp},
{Name: "start_time", Type: infoSchemaTypeTimestamp},
{Name: "end_time", Type: infoSchemaTypeTimestamp},
{Name: "state", Type: infoSchemaTypeString},
{Name: "job_type", Type: infoSchemaTypeString},
{Name: infoSchemaColProjectID, Type: infoSchemaTypeString},
{Name: "query", Type: infoSchemaTypeString},
{Name: "statement_type", Type: infoSchemaTypeString},
{Name: "user_email", Type: infoSchemaTypeString},
{Name: "parent_job_id", Type: infoSchemaTypeString},
{Name: "total_bytes_processed", Type: infoSchemaTypeInt64},
{Name: "cache_hit", Type: "BOOL"},
{
Name: "destination_table", Type: infoSchemaTypeStruct, Fields: []*enginepb.FieldSchema{
{Name: infoSchemaColProjectID, Type: infoSchemaTypeString},
{Name: "dataset_id", Type: infoSchemaTypeString},
{Name: "table_id", Type: infoSchemaTypeString},
},
},
{
Name: "error_result", Type: infoSchemaTypeStruct, Fields: []*enginepb.FieldSchema{
{Name: "reason", Type: infoSchemaTypeString},
{Name: "message", Type: infoSchemaTypeString},
},
},
{
Name: "dml_statistics", Type: infoSchemaTypeStruct, Fields: []*enginepb.FieldSchema{
{Name: "inserted_row_count", Type: infoSchemaTypeInt64},
{Name: "deleted_row_count", Type: infoSchemaTypeInt64},
{Name: "updated_row_count", Type: infoSchemaTypeInt64},
},
},
}}
}
// InfoSchemaJobRows materializes registry jobs for projectID into map rows
// matching InfoSchemaJobsSchema.
func InfoSchemaJobRows(reg *Registry, projectID string) []map[string]any {
if reg == nil {
return nil
}
all, _ := reg.ListByProject(projectID, ListOptions{})
out := make([]map[string]any, 0, len(all))
for _, j := range all {
out = append(out, infoSchemaRowFromJob(j))
}
return out
}
func infoSchemaRowFromJob(j *Job) map[string]any {
if j == nil {
return map[string]any{}
}
row := map[string]any{
"job_id": j.JobReference.JobID,
"creation_time": millisToTimestamp(j.Statistics.CreationTime),
"start_time": millisToTimestamp(j.Statistics.StartTime),
"end_time": millisToTimestamp(j.Statistics.EndTime),
"state": j.Status.State,
"job_type": jobTypeFromConfiguration(j.Configuration),
infoSchemaColProjectID: j.JobReference.ProjectID,
"query": queryTextFromConfiguration(j.Configuration),
"statement_type": statementTypeFromJob(j),
"user_email": j.UserEmail,
"parent_job_id": parentJobID(j),
"total_bytes_processed": parseInt64OrZero(j.Statistics.TotalBytesProcessed),
"cache_hit": false,
}
if dest := destinationTableFromConfiguration(j.Configuration); dest != nil {
row["destination_table"] = dest
}
if j.Status.ErrorResult != nil {
row["error_result"] = map[string]any{
"reason": j.Status.ErrorResult.Reason,
"message": j.Status.ErrorResult.Message,
}
}
if dml := dmlStatsFromJob(j); dml != nil {
row["dml_statistics"] = dml
}
return row
}
func parentJobID(j *Job) string {
if j.ParentJobID != "" {
return j.ParentJobID
}
return j.Statistics.ParentJobID
}
func jobTypeFromConfiguration(cfg *JobConfiguration) string {
if cfg == nil {
return ""
}
if cfg.JobType != "" {
return strings.ToUpper(cfg.JobType)
}
switch {
case cfg.Query != nil:
return "QUERY"
case cfg.Load != nil:
return "LOAD"
case cfg.Copy != nil:
return "COPY"
case cfg.Extract != nil:
return "EXTRACT"
default:
return ""
}
}
func queryTextFromConfiguration(cfg *JobConfiguration) string {
if cfg == nil || cfg.Query == nil {
return ""
}
return cfg.Query.Query
}
func statementTypeFromJob(j *Job) string {
if j.Result != nil && j.Result.StatementType != "" {
return j.Result.StatementType
}
if j.Statistics.Query != nil && j.Statistics.Query.StatementType != "" {
return j.Statistics.Query.StatementType
}
return ""
}
func destinationTableFromConfiguration(cfg *JobConfiguration) map[string]any {
if cfg == nil {
return nil
}
var ref *bqtypes.TableReference
switch {
case cfg.Query != nil && cfg.Query.DestinationTable != nil:
ref = cfg.Query.DestinationTable
case cfg.Load != nil && cfg.Load.DestinationTable != nil:
ref = cfg.Load.DestinationTable
case cfg.Copy != nil && cfg.Copy.DestinationTable != nil:
ref = cfg.Copy.DestinationTable
}
if ref == nil {
return nil
}
return map[string]any{
infoSchemaColProjectID: ref.ProjectID,
"dataset_id": ref.DatasetID,
"table_id": ref.TableID,
}
}
func dmlStatsFromJob(j *Job) map[string]any {
var stats *bqtypes.DmlStats
if j.Result != nil && j.Result.DmlStats != nil {
stats = j.Result.DmlStats
}
if stats == nil {
return nil
}
return map[string]any{
"inserted_row_count": parseInt64OrZero(stats.InsertedRowCount),
"deleted_row_count": parseInt64OrZero(stats.DeletedRowCount),
"updated_row_count": parseInt64OrZero(stats.UpdatedRowCount),
}
}
func millisToTimestamp(ms string) any {
if strings.TrimSpace(ms) == "" {
return nil
}
n, err := strconv.ParseInt(ms, 10, 64)
if err != nil {
return nil
}
return time.UnixMilli(n).UTC().Format("2006-01-02 15:04:05.999999 UTC")
}
func parseInt64OrZero(s string) int64 {
if strings.TrimSpace(s) == "" {
return 0
}
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return 0
}
return n
}
// Package jobs is the gateway-side, in-memory record of every
// BigQuery job the emulator has accepted in this process. It feeds
// the synchronous `jobs.query` response (the `jobReference` and
// timing statistics it must emit alongside rows) and is the source
// of truth `jobs.get` / `jobs.list` will read from once those land.
//
// Scope today:
//
// - One process-local Registry per gateway. State is volatile;
// restarts wipe the table. Spanner-emulator does the same with
// its in-memory metadata catalog.
// - Jobs are minted by `jobs.query` (the sync query API) and the
// sync slice of `jobs.insert` (query / load / copy / extract).
// Load / copy / extract insert paths dispatch and round-trip
// configuration but defer byte-level work to plans tp08-04/05.
// - Jobs are recorded as `DONE` straight away. The emulator runs
// each query synchronously, so a pending/running window never
// exists on the wire from the caller's perspective. Async
// execution lands later when DML / long-running jobs need real
// lifecycle transitions.
//
// The shape of `Job`, `Status`, and `Statistics` mirrors the subset of
// `https://docs.cloud.google.com/bigquery/docs/reference/rest/v2/jobs#Job`
// the emulator emits today. JSON tags match the upstream wire field
// names so a stored `*Job` round-trips through `jobs.get` without an
// extra translation layer.
package jobs
import (
"encoding/json"
"fmt"
"slices"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// JobKind is the value of the `kind` field on a BigQuery Job
// resource. Stable across all job types (query / load / copy /
// extract); the per-configuration discriminator lives under
// `configuration.{query,load,...}` on a real Job, which the
// emulator does not populate yet.
const JobKind = "bigquery#job"
// JobState mirrors the upstream `Job.status.state` enum: PENDING
// (admitted but not yet scheduled), RUNNING (work in progress),
// DONE (terminal -- success or failure determined by `errorResult`).
const (
JobStatePending = "PENDING"
JobStateRunning = "RUNNING"
JobStateDone = "DONE"
)
// stateFilterAliases maps the lowercase wire spelling BigQuery
// accepts on `?stateFilter=` query parameters to the canonical
// upper-case `Status.State` value stored in the registry. The
// upstream API documents the parameter values as `pending`,
// `running`, and `done`; the response stamps the uppercase variant.
// Centralized here so `ListByProject` and tests share one source.
var stateFilterAliases = map[string]string{
"pending": JobStatePending,
"running": JobStateRunning,
"done": JobStateDone,
}
// Status mirrors the upstream `JobStatus` resource. ErrorResult is
// populated only when the job terminated with an error; Errors is
// a (potentially empty) list of warnings/errors collected during
// execution. Both are kept omitempty so a successful `jobs.query`
// reply doesn't carry empty arrays / null sentinel objects.
//
// CancelRequested mirrors the upstream `JobStatus.cancelRequested`
// flag the `JobCancel` handler stamps on the response. The gateway
// runs every job synchronously today so the flag flips to true at
// the same instant the entry's state moves to DONE; the field is
// omitempty so jobs that were never cancelled keep the same compact
// wire shape `jobs.query` emits.
type Status struct {
State string `json:"state"`
ErrorResult *bqtypes.ErrorProto `json:"errorResult,omitempty"`
Errors []bqtypes.ErrorProto `json:"errors,omitempty"`
CancelRequested bool `json:"cancelRequested,omitempty"`
}
// JobConfiguration mirrors the subset of the upstream
// `JobConfiguration` resource the gateway round-trips through the
// registry. The per-type sub-objects (`Query`, `Load`, `Copy`,
// `Extract`) are the dispatch discriminator at `jobs.insert` time;
// everything else round-trips opaquely so a subsequent `jobs.get`
// echoes back the same shape the caller posted.
type JobConfiguration struct {
JobType string `json:"jobType,omitempty"` // QUERY | LOAD | COPY | EXTRACT
Query *JobConfigurationQuery `json:"query,omitempty"`
Load *JobConfigurationLoad `json:"load,omitempty"`
Copy *JobConfigurationCopy `json:"copy,omitempty"`
Extract *JobConfigurationExtract `json:"extract,omitempty"`
Labels map[string]string `json:"labels,omitempty"`
DryRun bool `json:"dryRun,omitempty"`
}
// JobConfigurationQuery is the per-query slice of a JobConfiguration.
// Only fields the gateway currently echoes back on `jobs.get` are
// modelled; the long tail (destination table, scheduling, encryption,
// ...) is deferred until a handler reads them.
type JobConfigurationQuery struct {
Query string `json:"query"`
DefaultDataset *bqtypes.DatasetReference `json:"defaultDataset,omitempty"`
UseLegacySQL *bool `json:"useLegacySql,omitempty"`
ParameterMode string `json:"parameterMode,omitempty"`
QueryParameters []bqtypes.QueryParameter `json:"queryParameters,omitempty"`
TableDefinitions map[string]bqtypes.ExternalDataConfiguration `json:"tableDefinitions,omitempty"`
DestinationTable *bqtypes.TableReference `json:"destinationTable,omitempty"`
WriteDisposition string `json:"writeDisposition,omitempty"`
SchemaUpdateOptions []string `json:"schemaUpdateOptions,omitempty"`
Clustering *bqtypes.Clustering `json:"clustering,omitempty"`
TimePartitioning *bqtypes.TimePartitioning `json:"timePartitioning,omitempty"`
DestinationEncryptionConfiguration *bqtypes.EncryptionConfiguration `json:"destinationEncryptionConfiguration,omitempty"`
CreateSession bool `json:"createSession,omitempty"`
ConnectionProperties []bqtypes.ConnectionProperty `json:"connectionProperties,omitempty"`
}
// UnmarshalJSON accepts writeDisposition as a JSON string or a
// one-element string array (node relaxColumnQueryAppend sample).
func (c *JobConfigurationQuery) UnmarshalJSON(data []byte) error {
type alias JobConfigurationQuery
var raw struct {
alias
WriteDisposition json.RawMessage `json:"writeDisposition,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*c = JobConfigurationQuery(raw.alias)
if len(raw.WriteDisposition) == 0 {
return nil
}
wd, err := bqtypes.UnmarshalWriteDisposition(raw.WriteDisposition)
if err != nil {
return err
}
c.WriteDisposition = wd
return nil
}
// JobConfigurationLoad is the per-load slice of a JobConfiguration.
// Fields mirror the minimum upstream REST shape thirdparty samples
// exercise; format readers and GCS byte I/O land in plan tp08-04.
type JobConfigurationLoad struct {
SourceURIs []string `json:"sourceUris,omitempty"`
DestinationTable *bqtypes.TableReference `json:"destinationTable,omitempty"`
SourceFormat string `json:"sourceFormat,omitempty"`
WriteDisposition string `json:"writeDisposition,omitempty"`
Schema *bqtypes.TableSchema `json:"schema,omitempty"`
Autodetect bool `json:"autodetect,omitempty"`
SchemaUpdateOptions []string `json:"schemaUpdateOptions,omitempty"`
DestinationEncryptionConfiguration *bqtypes.EncryptionConfiguration `json:"destinationEncryptionConfiguration,omitempty"`
Clustering *bqtypes.Clustering `json:"clustering,omitempty"`
TimePartitioning *bqtypes.TimePartitioning `json:"timePartitioning,omitempty"`
HivePartitioningOptions *bqtypes.HivePartitioningOptions `json:"hivePartitioningOptions,omitempty"`
skipLeadingRows int // set via UnmarshalJSON; REST sends int or string
}
// SkipLeadingRows returns the number of leading CSV rows to skip.
func (c *JobConfigurationLoad) SkipLeadingRows() int {
if c == nil {
return 0
}
return c.skipLeadingRows
}
// UnmarshalJSON accepts skipLeadingRows as JSON number or decimal string,
// matching the official Python/Node client wire shape. writeDisposition
// may also be posted as a one-element string array.
func (c *JobConfigurationLoad) UnmarshalJSON(data []byte) error {
type alias JobConfigurationLoad
var raw struct {
alias
SkipLeadingRows any `json:"skipLeadingRows,omitempty"`
WriteDisposition json.RawMessage `json:"writeDisposition,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*c = JobConfigurationLoad(raw.alias)
if wd, err := bqtypes.UnmarshalWriteDisposition(raw.WriteDisposition); err != nil {
return err
} else if wd != "" {
c.WriteDisposition = wd
}
if raw.SkipLeadingRows == nil {
return nil
}
switch v := raw.SkipLeadingRows.(type) {
case float64:
c.skipLeadingRows = int(v)
case string:
n, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("skipLeadingRows: %w", err)
}
c.skipLeadingRows = n
default:
return fmt.Errorf("skipLeadingRows: unsupported type %T", v)
}
return nil
}
// UnmarshalJSON accepts writeDisposition as a string or a
// one-element string array.
func (c *JobConfigurationCopy) UnmarshalJSON(data []byte) error {
type alias JobConfigurationCopy
var raw struct {
alias
WriteDisposition json.RawMessage `json:"writeDisposition,omitempty"`
}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
*c = JobConfigurationCopy(raw.alias)
wd, err := bqtypes.UnmarshalWriteDisposition(raw.WriteDisposition)
if err != nil {
return err
}
c.WriteDisposition = wd
return nil
}
// JobConfigurationCopy is the per-copy slice of a JobConfiguration.
type JobConfigurationCopy struct {
SourceTable *bqtypes.TableReference `json:"sourceTable,omitempty"`
SourceTables []bqtypes.TableReference `json:"sourceTables,omitempty"`
DestinationTable *bqtypes.TableReference `json:"destinationTable,omitempty"`
WriteDisposition string `json:"writeDisposition,omitempty"`
CreateDisposition string `json:"createDisposition,omitempty"`
DestinationEncryptionConfiguration *bqtypes.EncryptionConfiguration `json:"destinationEncryptionConfiguration,omitempty"`
// OperationType is COPY (default), SNAPSHOT, RESTORE, or CLONE per
// BigQuery JobConfigurationTableCopy.operationType.
OperationType string `json:"operationType,omitempty"`
// DestinationExpirationTime is epoch milliseconds when the destination
// table expires (decimal string on the wire).
DestinationExpirationTime string `json:"destinationExpirationTime,omitempty"`
}
// JobConfigurationExtract is the per-extract slice of a JobConfiguration.
type JobConfigurationExtract struct {
SourceTable *bqtypes.TableReference `json:"sourceTable,omitempty"`
DestinationURIs []string `json:"destinationUris,omitempty"`
DestinationFormat string `json:"destinationFormat,omitempty"`
Compression string `json:"compression,omitempty"`
}
// Statistics mirrors the subset of `JobStatistics` the emulator
// currently fills in. All four timestamp / byte fields are decimal
// strings on the wire per
// docs/bigquery/docs/reference/rest/v2/jobs/get.md#JobStatistics --
// even `totalBytesProcessed`, because BigQuery REST never emits
// 64-bit integers as JSON numbers (clients use `string` decoders).
type Statistics struct {
CreationTime string `json:"creationTime,omitempty"`
StartTime string `json:"startTime,omitempty"`
EndTime string `json:"endTime,omitempty"`
TotalBytesProcessed string `json:"totalBytesProcessed,omitempty"`
ParentJobID string `json:"parentJobId,omitempty"`
NumChildJobs string `json:"numChildJobs,omitempty"`
SessionInfo *bqtypes.SessionInfo `json:"sessionInfo,omitempty"`
Query *bqtypes.JobStatistics2 `json:"query,omitempty"`
Load *LoadStatistics `json:"load,omitempty"`
Copy *CopyStatistics `json:"copy,omitempty"`
Extract *ExtractStatistics `json:"extract,omitempty"`
}
// LoadStatistics mirrors upstream `JobStatistics3` (statistics.load).
type LoadStatistics struct {
InputFiles string `json:"inputFiles,omitempty"`
InputFileBytes string `json:"inputFileBytes,omitempty"`
OutputRows string `json:"outputRows,omitempty"`
OutputBytes string `json:"outputBytes,omitempty"`
BadRecords string `json:"badRecords,omitempty"`
}
// CopyStatistics mirrors upstream `CopyJobStatistics` (statistics.copy).
type CopyStatistics struct {
CopiedRows string `json:"copiedRows,omitempty"`
CopiedLogicalBytes string `json:"copiedLogicalBytes,omitempty"`
}
// ExtractStatistics mirrors upstream `JobStatistics4` (statistics.extract).
type ExtractStatistics struct {
DestinationURIFileCounts []string `json:"destinationUriFileCounts,omitempty"`
InputBytes string `json:"inputBytes,omitempty"`
}
// QueryResult is the cached result of a synchronous query, kept in
// the registry so a follow-up `jobs.getQueryResults` can replay the
// same schema and rows without re-running the SQL. Schema and Rows
// are stored in the BigQuery REST `f`/`v` shape so the handler can
// emit them verbatim.
//
// The registry holds the entire result set in memory; this matches
// the "single-page only" charter from
// `docs/ENGINE_POLICY.md`. Pagination
// (real `pageToken` lifecycle, cursored reads from a streaming
// engine) is deferred until long-running jobs land.
type QueryResult struct {
Schema *bqtypes.TableSchema
Rows []bqtypes.Row
// DmlStats is non-nil for an INSERT/UPDATE/DELETE/MERGE job and
// nil for a SELECT/DDL job. When set, `jobs.getQueryResults`
// surfaces the same `dmlStats` + `numDmlAffectedRows` envelope
// the synchronous `jobs.query` response carried, so polling
// BigQuery clients (e.g. the Go SDK's `JobIterator`) see the
// row counts on the replay too.
DmlStats *bqtypes.DmlStats
// StatementType is the canonical BigQuery REST statement-type
// string the engine trailed on the `jobs.query` response (e.g.
// `SELECT`, `INSERT`, `CREATE_TABLE`). Stashed on the cached
// result so `jobs.getQueryResults` can re-surface the same
// `Job.statistics.query.statementType` envelope on the replay
// without re-running the SQL.
StatementType string
// EmulatorRoute is the canonical lowercase-snake disposition
// string the C++ coordinator's `RouteClassifier` chose for the
// original query (`duckdb_native`, `semantic_executor`,
// `control_op`, ...). It is an emulator-internal debug field;
// `jobs.getQueryResults` only surfaces it to loopback callers
// (the call site enforces the gating via
// `middleware.IsLoopback`) so the public REST shape stays the
// same.
EmulatorRoute string
// EmulatorPhases carries per-phase timings (microseconds) from the
// engine's phase_timings trailer for loopback replay.
EmulatorPhases map[string]int64
// DdlTargetRoutine is set when a CREATE_FUNCTION /
// CREATE_PROCEDURE DDL statement registers a routine.
DdlTargetRoutine *bqtypes.RoutineReference
}
// Job is the gateway's view of a single BigQuery job. Today it's
// populated from the sync `jobs.query` path and the sync-query slice
// of `jobs.insert`; the per-type `*Statistics` sub-objects are
// deferred until a handler actually needs them.
//
// Result is the cached query result, populated by
// `CompleteQueryWithResult` and consumed by `jobs.getQueryResults`.
// It is excluded from the JSON encoding because the upstream Job
// resource has no rows/schema field; result data is only emitted
// through the dedicated `QueryResponse`/`GetQueryResultsResponse`
// shapes.
//
// ParentJobID is non-empty for script statement-level jobs spawned
// under a scripting parent, mirroring upstream's
// `Job.statistics.parentJobId`. `JobDelete` cascades by removing
// every entry whose ParentJobID matches the requested jobId so a
// scripting parent's children disappear in one call.
//
// CancelRequested mirrors the upstream `Job.status.cancelRequested`
// flag the `JobCancel` handler stamps on the response envelope. The
// gateway runs every job synchronously today so the flag flips to
// true at the same instant the entry's state moves to CANCELLED;
// once a long-running execution lane lands the flag's pre-flip
// observation window will widen.
//
// Configuration is the round-trip copy of the inbound
// `configuration` body so `jobs.get` / `jobs.list` echo back the
// same fields the caller posted at `jobs.insert` time. Sync
// `jobs.query` calls (which do not go through `jobs.insert`) leave
// it nil; clients reading those entries see no `configuration`
// field, matching the upstream behavior.
type Job struct {
Kind string `json:"kind,omitempty"`
ID string `json:"id,omitempty"`
JobReference bqtypes.JobReference `json:"jobReference"`
Status Status `json:"status"`
Statistics Statistics `json:"statistics"`
Configuration *JobConfiguration `json:"configuration,omitempty"`
UserEmail string `json:"user_email,omitempty"`
// ParentJobID is the registry's link to a scripting parent. It
// is round-tripped under `Statistics.parentJobId` once the per-
// type statistics envelope ships; today it stays an internal
// link so `JobDelete` can cascade by parent-id without growing
// a separate scripting index. JSON tag is `-` so the field does
// not yet appear on the wire.
ParentJobID string `json:"-"`
Result *QueryResult `json:"-"`
}
// Registry is a process-local jobs table keyed by jobId. Reads /
// writes are concurrency-safe via a single sync.RWMutex over an
// ordered slice + map index. We track insertion order on the side
// (the `order` slice) so `ListByProject` can hand back a deterministic
// reverse-chronological page without the caller having to sort an
// arbitrary `sync.Map` walk. The monotonic counter is bumped
// atomically so even within a single nanosecond two requests still
// see distinct ids.
//
// The map only holds successful or terminally-failed jobs today;
// the emulator does not yet maintain a pending queue (see the
// package-level doc for why DONE-on-arrival is fine).
type Registry struct {
counter atomic.Uint64
mu sync.RWMutex
jobs map[string]*Job
// order is the insertion-ordered list of jobIds. `ListByProject`
// iterates this in reverse to produce a newest-first page, which
// matches what the BigQuery client libraries (and the upstream
// `jobs.list` default sort) display. The slice grows on
// Register / CompleteQuery and shrinks on Delete (linear scan;
// fine for the per-process volumes the emulator handles, ~10s
// of jobs in any test run).
order []string
}
// NewRegistry returns a fresh, empty registry. Each gateway process
// gets one; tests can mint their own per-test for isolation without
// polluting a global.
func NewRegistry() *Registry {
return &Registry{jobs: map[string]*Job{}}
}
// NewJobID generates a jobId of the form `job_<unix_nanos>_<seq>`.
// The `job_` prefix matches the convention BigQuery and the official
// client libraries use for auto-generated ids (cf.
// `cloud.google.com/go/bigquery`'s `randomIDFn`). The trailing
// monotonic seq guarantees uniqueness even when two requests collide
// on the same nanosecond, which can happen on coarse-resolution
// clocks (Windows) under heavy concurrency.
func (r *Registry) NewJobID() string {
seq := r.counter.Add(1)
return "job_" +
strconv.FormatInt(time.Now().UnixNano(), 10) + "_" +
strconv.FormatUint(seq, 10)
}
// CompleteQuery records a query job that already finished -- the
// happy path for sync `jobs.query`. The returned Job carries a
// freshly minted jobReference plus the canonical Status / Statistics
// the caller stamps into the `QueryResponse`. The same `*Job` is
// stored in the registry so a follow-up `jobs.get` can return it
// verbatim.
//
// projectID flows from the URL path. location comes from the
// QueryRequest body (empty when the client did not specify one);
// matching BigQuery, the registry never invents a location.
// totalBytesProcessed reflects how many bytes the engine reported
// scanning -- 0 is acceptable when the engine has not wired the
// metric yet.
func (r *Registry) CompleteQuery(
projectID, location string,
totalBytesProcessed int64,
start, end time.Time,
) *Job {
return r.CompleteQueryWithResult(
projectID, location, totalBytesProcessed, start, end, nil)
}
// CompleteQueryWithResult records a finished query job along with the
// schema + rows the engine produced. The result is cached on the Job
// so `jobs.getQueryResults` can replay it without re-running the SQL.
// Pass `result == nil` when no rows are available (the same behavior
// as `CompleteQuery`).
func (r *Registry) CompleteQueryWithResult(
projectID, location string,
totalBytesProcessed int64,
start, end time.Time,
result *QueryResult,
) *Job {
jobID := r.NewJobID()
j := &Job{
Kind: JobKind,
ID: projectID + ":" + jobID,
JobReference: bqtypes.JobReference{
ProjectID: projectID,
JobID: jobID,
Location: location,
},
Status: Status{State: JobStateDone},
Statistics: Statistics{
CreationTime: millisString(start),
StartTime: millisString(start),
EndTime: millisString(end),
TotalBytesProcessed: strconv.FormatInt(totalBytesProcessed, 10),
},
Result: result,
}
r.Register(j)
return j
}
// Register inserts j into the registry under its JobReference.JobID.
// If a job with the same id is already present the call is a no-op
// (the existing pointer is preserved). `CompleteQueryWithResult`
// flows through here so the sync-query and async-insert paths share
// one writer. Tests that need a hand-built Job (e.g. to seed a
// non-DONE entry the cancel/delete handlers will read back) can also
// call this directly.
func (r *Registry) Register(j *Job) {
if j == nil {
return
}
id := j.JobReference.JobID
if id == "" {
return
}
r.mu.Lock()
defer r.mu.Unlock()
if _, exists := r.jobs[id]; exists {
return
}
r.jobs[id] = j
r.order = append(r.order, id)
}
// Get returns the Job recorded under jobID, or (nil, false) if no
// such job is in the registry. Used by `jobs.get` /
// `jobs.getQueryResults`.
func (r *Registry) Get(jobID string) (*Job, bool) {
r.mu.RLock()
defer r.mu.RUnlock()
j, ok := r.jobs[jobID]
return j, ok
}
// ListOptions captures the documented `jobs.list` query parameters
// the handler exposes today. Empty / zero-valued fields are treated
// as "no filter". PageToken is the opaque cursor `ListByProject`
// hands back; the handler does not need to interpret it.
type ListOptions struct {
MaxResults int
PageToken string
ParentJobID string
MinCreationTime int64 // millis since epoch; 0 = unbounded
MaxCreationTime int64 // millis since epoch; 0 = unbounded
StateFilter []string
}
// ListByProject returns the page of jobs belonging to projectID that
// match the supplied options. Results are ordered newest-first
// (mirroring `bigquery.jobs.list`'s default sort) and pagination is
// cursor-based: the returned nextPageToken is opaque to the caller
// and feeds straight back into `ListOptions.PageToken` for the next
// page. When no more pages remain the token is empty.
//
// `MaxResults <= 0` means "the documented default cap" (50 today;
// upstream picks the same number when callers omit the field).
func (r *Registry) ListByProject(projectID string, opts ListOptions) (
jobs []*Job, nextPageToken string,
) {
r.mu.RLock()
defer r.mu.RUnlock()
maxResults := opts.MaxResults
if maxResults <= 0 {
maxResults = defaultListMaxResults
}
stateFilters := normalizeStateFilters(opts.StateFilter)
startIdx, _ := strconv.Atoi(opts.PageToken)
skipped := 0
jobs = make([]*Job, 0, maxResults)
// Walk newest-first by iterating `order` in reverse; this matches
// `bigquery.jobs.list`'s default sort. The cursor token is the
// count of newest-first jobs the caller has already consumed so
// resuming a page just means continuing past them.
for _, v := range slices.Backward(r.order) {
j := r.jobs[v]
if !jobMatchesProject(j, projectID, opts, stateFilters) {
continue
}
if skipped < startIdx {
skipped++
continue
}
if len(jobs) >= maxResults {
nextPageToken = strconv.Itoa(startIdx + len(jobs))
return jobs, nextPageToken
}
jobs = append(jobs, j)
}
return jobs, ""
}
// defaultListMaxResults bounds the per-page result count when the
// caller leaves `MaxResults` zero. Upstream's documented default is
// 50; matching it avoids surprises for clients that probe the
// emulator before passing an explicit cap.
const defaultListMaxResults = 50
// jobMatchesProject is the per-entry filter `ListByProject` runs
// against the iteration. Hoisted out so the page loop stays a
// straight cursor without nested ifs (cyclop / nestif caps).
func jobMatchesProject(j *Job, projectID string, opts ListOptions, stateFilters map[string]bool) bool {
if j.JobReference.ProjectID != projectID {
return false
}
if opts.ParentJobID != "" && j.ParentJobID != opts.ParentJobID {
return false
}
if len(stateFilters) != 0 && !stateFilters[j.Status.State] {
return false
}
creation, _ := strconv.ParseInt(j.Statistics.CreationTime, 10, 64)
if opts.MinCreationTime != 0 && creation < opts.MinCreationTime {
return false
}
if opts.MaxCreationTime != 0 && creation > opts.MaxCreationTime {
return false
}
return true
}
// normalizeStateFilters folds the caller-provided wire spellings
// (`pending` / `running` / `done`) into a set keyed by the canonical
// `Status.State` value the registry stores. Unknown spellings are
// dropped on the floor (the upstream API documents the parameter
// values explicitly and a typo should not silently broaden a query).
// Returns nil for the no-filter case so the per-entry filter knows
// to skip the state check entirely.
func normalizeStateFilters(in []string) map[string]bool {
if len(in) == 0 {
return nil
}
out := make(map[string]bool, len(in))
for _, raw := range in {
if canon, ok := stateFilterAliases[strings.ToLower(strings.TrimSpace(raw))]; ok {
out[canon] = true
}
}
if len(out) == 0 {
return nil
}
return out
}
// Cancel flips the named job from PENDING/RUNNING to DONE +
// CancelRequested=true and reports the updated entry. Idempotent on
// terminal states (DONE jobs come back with their existing status
// untouched, only CancelRequested set). The bool is false when the
// jobId is unknown so the handler can return a 404 with a
// BigQuery-shaped envelope; the error message is BigQuery's
// canonical "Not found: Job" wording so the caller can forward it
// verbatim.
func (r *Registry) Cancel(jobID string) (*Job, bool) {
r.mu.Lock()
defer r.mu.Unlock()
j, ok := r.jobs[jobID]
if !ok {
return nil, false
}
j.Status.CancelRequested = true
if j.Status.State != JobStateDone {
j.Status.State = JobStateDone
if j.Statistics.EndTime == "" {
j.Statistics.EndTime = millisString(time.Now().UTC())
}
}
return j, true
}
// Delete removes jobID from the registry. When the job is a script
// parent every entry whose ParentJobID matches cascades out in the
// same call so the upstream contract -- "deleting a parent removes
// its children" -- holds without an extra round-trip. Returns false
// when the jobId is unknown.
func (r *Registry) Delete(jobID string) bool {
r.mu.Lock()
defer r.mu.Unlock()
if _, ok := r.jobs[jobID]; !ok {
return false
}
r.removeLocked(jobID)
// Cascade children. Walk a snapshot of the order slice so we
// don't iterate the underlying storage while mutating it.
for _, id := range append([]string(nil), r.order...) {
if child, ok := r.jobs[id]; ok && child.ParentJobID == jobID {
r.removeLocked(id)
}
}
return true
}
// removeLocked drops id from both `jobs` and `order`. Must be called
// with `mu` already held write-locked. The slice splice is a linear
// scan + copy; fine for the per-process volumes the emulator
// handles. If a future load lane pushes registry size into the
// thousands this can be replaced with a doubly-linked list, but the
// extra book-keeping is not warranted today.
func (r *Registry) removeLocked(id string) {
delete(r.jobs, id)
for i, entry := range r.order {
if entry == id {
r.order = append(r.order[:i], r.order[i+1:]...)
return
}
}
}
// millisString converts t to BigQuery's wire timestamp format:
// decimal milliseconds since the Unix epoch. Used for all four
// `creationTime` / `startTime` / `endTime` / `totalBytesProcessed`-
// adjacent timestamps emitted in `Statistics`.
func millisString(t time.Time) string {
return strconv.FormatInt(t.UnixMilli(), 10)
}
// FormatDryRunBytesProcessed renders estimated bytes as the decimal
// string BigQuery REST emits for dry-run jobs. Client libraries treat
// an empty or zero counter as missing; upstream dry-run samples assert
// a positive value, so zero engine estimates surface as "1".
func FormatDryRunBytesProcessed(estimated int64) string {
if estimated <= 0 {
return "1"
}
return strconv.FormatInt(estimated, 10)
}
// ApplyDryRunStatistics stamps the DONE dry-run terminus on a query
// job, mirroring both statistics.totalBytesProcessed and the nested
// statistics.query.totalBytesProcessed envelope QueryJob reads.
func ApplyDryRunStatistics(job *Job, estimated int64, start, end time.Time) {
if job == nil {
return
}
bytes := FormatDryRunBytesProcessed(estimated)
job.Statistics.StartTime = millisString(start)
job.Statistics.EndTime = millisString(end)
job.Statistics.TotalBytesProcessed = bytes
job.Statistics.Query = &bqtypes.JobStatistics2{TotalBytesProcessed: bytes}
}
package load
import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// Result captures load-job statistics for jobs.insert responses.
type Result struct {
InputFiles int
InputFileBytes int64
OutputRows int64
OutputBytes int64
}
// Execute runs a synchronous LOAD job against the engine catalog.
func Execute(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationLoad,
defaultProject string,
) (Result, error) {
return execute(ctx, catalog, cfg, defaultProject, nil)
}
// ExecuteFromBytes runs a LOAD job using inline upload bytes instead of sourceUris.
func ExecuteFromBytes(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationLoad,
defaultProject string, media []byte,
) (Result, error) {
return execute(ctx, catalog, cfg, defaultProject, [][]byte{media})
}
func execute(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationLoad,
defaultProject string, inline [][]byte,
) (Result, error) {
if cfg == nil {
return Result{}, errors.New("load configuration is required")
}
if cfg.DestinationTable == nil || cfg.DestinationTable.TableID == "" {
return Result{}, errors.New("destinationTable.tableId is required")
}
if len(cfg.SourceURIs) == 0 && len(inline) == 0 {
return Result{}, errors.New("sourceUris or upload media is required")
}
projectID := cfg.DestinationTable.ProjectID
if projectID == "" {
projectID = defaultProject
}
datasetID := cfg.DestinationTable.DatasetID
tableID := cfg.DestinationTable.TableID
parseSchema := cfg.Schema
if parseSchema == nil || len(parseSchema.Fields) == 0 {
if !cfg.Autodetect {
parseSchema = existingDestinationSchema(ctx, catalog, projectID, datasetID, tableID)
}
}
parsed, totalBytes, inputFiles, err := parseLoadSources(ctx, cfg, inline, parseSchema)
if err != nil {
return Result{}, err
}
if err = EnsureDataset(ctx, catalog, projectID, datasetID); err != nil {
return Result{}, err
}
protoSchema, err := resolveDestinationSchema(ctx, catalog, cfg, projectID, datasetID, tableID, parsed.Schema)
if err != nil {
return Result{}, err
}
if protoSchema == nil {
protoSchema = SchemaToProto(parsed.Schema)
}
if err = applyWriteDisposition(ctx, catalog, cfg, projectID, datasetID, tableID, protoSchema); err != nil {
return Result{}, err
}
ref := seed.TableRef{ProjectID: projectID, DatasetID: datasetID, TableID: tableID}
applier := seed.NewCatalogApplier(catalog)
inserted, err := applier.InsertRows(ctx, ref, protoSchema, parsed.Rows)
if err != nil {
return Result{}, err
}
return Result{
InputFiles: inputFiles,
InputFileBytes: totalBytes,
OutputRows: int64(inserted),
OutputBytes: totalBytes,
}, nil
}
func parseLoadSources(ctx context.Context, cfg *jobs.JobConfigurationLoad, inline [][]byte,
parseSchema *bqtypes.TableSchema,
) (parsed ParsedRows, totalBytes int64, inputFiles int, err error) {
if len(inline) > 0 {
return parseInlineSources(cfg, inline, parseSchema)
}
return parseURISources(ctx, cfg, parseSchema)
}
func parseInlineSources(cfg *jobs.JobConfigurationLoad, inline [][]byte,
parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
var parsed ParsedRows
var totalBytes int64
for i, data := range inline {
totalBytes += int64(len(data))
chunk, err := ParseSource(cfg.SourceFormat, data, parseSchema, cfg.SkipLeadingRows(), cfg.Autodetect)
if err != nil {
return ParsedRows{}, 0, 0, err
}
parsed = mergeParsedChunk(parsed, chunk, i == 0)
}
return parsed, totalBytes, len(inline), nil
}
func parseURISources(ctx context.Context, cfg *jobs.JobConfigurationLoad,
parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
if cfg.HivePartitioningOptions != nil {
return parseHiveURISources(ctx, cfg, parseSchema)
}
sourceFormat := strings.ToUpper(strings.TrimSpace(cfg.SourceFormat))
if sourceFormat == "" {
sourceFormat = inferSourceFormatFromURIs(cfg.SourceURIs)
}
if sourceFormat == sourceFormatDatastoreBackup {
cfgCopy := *cfg
cfgCopy.SourceFormat = sourceFormat
return parseDatastoreBackupSources(ctx, &cfgCopy, parseSchema)
}
uris, err := ExpandSourceURIs(ctx, cfg.SourceURIs)
if err != nil {
return ParsedRows{}, 0, 0, err
}
var parsed ParsedRows
var totalBytes int64
for i, uri := range uris {
data, err := FetchSource(ctx, uri)
if err != nil {
return ParsedRows{}, 0, 0, err
}
totalBytes += int64(len(data))
chunk, err := ParseSource(sourceFormat, data, parseSchema, cfg.SkipLeadingRows(), cfg.Autodetect)
if err != nil {
return ParsedRows{}, 0, 0, err
}
parsed = mergeParsedChunk(parsed, chunk, i == 0)
}
return parsed, totalBytes, len(uris), nil
}
func inferSourceFormatFromURIs(uris []string) string {
for _, uri := range uris {
if strings.HasSuffix(uri, ".export_metadata") {
return "DATASTORE_BACKUP"
}
}
return ""
}
func mergeParsedChunk(acc, chunk ParsedRows, first bool) ParsedRows {
if first {
return chunk
}
acc.Rows = append(acc.Rows, chunk.Rows...)
return acc
}
// EnsureDestinationTable applies write-disposition semantics for a
// destination table ref, registering the schema when missing.
func EnsureDestinationTable(ctx context.Context, catalog enginepb.CatalogClient,
projectID, datasetID, tableID, writeDisposition string, schema *enginepb.TableSchema,
) error {
cfg := &jobs.JobConfigurationLoad{
DestinationTable: &bqtypes.TableReference{
ProjectID: projectID,
DatasetID: datasetID,
TableID: tableID,
},
WriteDisposition: writeDisposition,
}
return applyWriteDisposition(ctx, catalog, cfg, projectID, datasetID, tableID, schema)
}
// EnsureDataset registers the dataset when missing.
func EnsureDataset(ctx context.Context, catalog enginepb.CatalogClient, projectID, datasetID string) error {
applier := seed.NewCatalogApplier(catalog)
_, err := applier.EnsureDataset(ctx, projectID, datasetID, "US")
return err
}
func applyWriteDisposition(ctx context.Context, catalog enginepb.CatalogClient,
cfg *jobs.JobConfigurationLoad, projectID, datasetID, tableID string, schema *enginepb.TableSchema,
) error {
wd := cfg.WriteDisposition
if wd == "" {
wd = writeAppend
}
tableRef := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
exists := tableExists(ctx, catalog, tableRef)
switch wd {
case "WRITE_TRUNCATE":
if exists {
if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
return fmt.Errorf("WRITE_TRUNCATE drop table: %w", err)
}
}
_, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: tableRef,
Schema: schema,
})
return err
case "WRITE_EMPTY":
if exists {
return fmt.Errorf("destination table %s.%s.%s is not empty", projectID, datasetID, tableID)
}
_, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: tableRef,
Schema: schema,
})
return err
default: // WRITE_APPEND and CREATE_IF_NEEDED semantics
if !exists {
_, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: tableRef,
Schema: schema,
})
if err != nil && status.Code(err) != codes.AlreadyExists {
return err
}
}
return nil
}
}
func tableExists(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef) bool {
_, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
return err == nil
}
// SchemaToProto converts a REST TableSchema to engine proto form.
func SchemaToProto(s *bqtypes.TableSchema) *enginepb.TableSchema {
if s == nil {
return nil
}
out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.Fields))}
for i := range s.Fields {
out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
}
return out
}
func fieldToProto(f bqtypes.TableFieldSchema) *enginepb.FieldSchema {
out := &enginepb.FieldSchema{
Name: f.Name,
Type: f.Type,
Mode: f.Mode,
Description: f.Description,
}
for i := range f.Fields {
out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
}
return out
}
// FormatStatistics maps a Result into jobs.LoadStatistics wire counters.
func FormatStatistics(r Result) *jobs.LoadStatistics {
return &jobs.LoadStatistics{
InputFiles: strconv.Itoa(r.InputFiles),
InputFileBytes: strconv.FormatInt(r.InputFileBytes, 10),
OutputRows: strconv.FormatInt(r.OutputRows, 10),
OutputBytes: strconv.FormatInt(r.OutputBytes, 10),
BadRecords: "0",
}
}
// Package load implements the data plane for BigQuery LOAD jobs:
// fetch source bytes, parse CSV/JSON, and bulk-insert into the engine catalog.
package load
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
)
// FetchSource reads all bytes for a load-job source URI. Supports gs://
// (via STORAGE_EMULATOR_HOST or https://storage.googleapis.com) and
// file:// paths for local fixtures.
func FetchSource(ctx context.Context, uri string) ([]byte, error) {
switch {
case strings.HasPrefix(uri, "gs://"):
return fetchGCS(ctx, uri)
case strings.HasPrefix(uri, "s3://"):
return fetchS3(ctx, uri)
case strings.HasPrefix(uri, "file://"):
path := strings.TrimPrefix(uri, "file://")
return os.ReadFile(path) //nolint:gosec // LOAD jobs intentionally read caller file:// URIs
default:
if filepath.IsAbs(uri) {
return os.ReadFile(uri) //nolint:gosec // absolute paths for local load samples
}
return nil, fmt.Errorf("unsupported sourceUri scheme: %q", uri)
}
}
func fetchS3(ctx context.Context, s3URI string) ([]byte, error) {
endpoint := strings.TrimRight(strings.TrimSpace(os.Getenv("S3_ENDPOINT")), "/")
if endpoint == "" {
return nil, errors.New("s3:// load sources require S3_ENDPOINT (dev-only); use gs:// or file:// instead")
}
rest := strings.TrimPrefix(s3URI, "s3://")
slash := strings.Index(rest, "/")
if slash <= 0 || slash == len(rest)-1 {
return nil, fmt.Errorf("invalid s3:// uri: %q", s3URI)
}
bucket := rest[:slash]
key := rest[slash+1:]
mediaURL, err := s3MediaURL(endpoint, bucket, key)
if err != nil {
return nil, err
}
//nolint:gosec // G704: host/scheme fixed to S3_ENDPOINT; object path from load URI is intentional
req, err := http.NewRequestWithContext(
ctx,
http.MethodGet,
mediaURL,
nil,
)
if err != nil {
return nil, err
}
//nolint:gosec // G704: dev-only fetch against operator-configured S3_ENDPOINT
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("fetch %s: %w", s3URI, err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
return nil, fmt.Errorf("fetch %s: HTTP %d: %s", s3URI, resp.StatusCode, strings.TrimSpace(string(body)))
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read %s: %w", s3URI, err)
}
return data, nil
}
// s3MediaURL builds a GET URL under the operator-configured S3_ENDPOINT.
// The host/scheme come only from S3_ENDPOINT, not the load URI.
func s3MediaURL(endpoint, bucket, key string) (string, error) {
base, err := url.Parse(endpoint)
if err != nil {
return "", fmt.Errorf("invalid S3_ENDPOINT %q: %w", endpoint, err)
}
if base.Scheme == "" || base.Host == "" {
return "", fmt.Errorf("invalid S3_ENDPOINT %q: scheme and host required", endpoint)
}
return base.JoinPath(bucket, key).String(), nil
}
func fetchGCS(ctx context.Context, gsURI string) ([]byte, error) {
rest := strings.TrimPrefix(gsURI, "gs://")
slash := strings.Index(rest, "/")
if slash <= 0 || slash == len(rest)-1 {
return nil, fmt.Errorf("invalid gs:// uri: %q", gsURI)
}
bucket := rest[:slash]
object := rest[slash+1:]
base := storageEmulatorBase()
mediaURL := fmt.Sprintf("%s/storage/v1/b/%s/o/%s?alt=media",
base, url.PathEscape(bucket), url.PathEscape(object))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, mediaURL, nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("fetch %s: %w", gsURI, err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
return nil, fmt.Errorf("fetch %s: HTTP %d: %s", gsURI, resp.StatusCode, strings.TrimSpace(string(body)))
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read %s: %w", gsURI, err)
}
return data, nil
}
// storageEmulatorBase returns the HTTP origin for GCS JSON API media
// downloads. Mirrors scripts/preflight_node_samples_gcs.sh normalization.
func storageEmulatorBase() string {
host := strings.TrimSpace(os.Getenv("STORAGE_EMULATOR_HOST"))
if host == "" {
port := os.Getenv("FAKE_GCS_PORT")
if port == "" {
port = "4443"
}
return "http://127.0.0.1:" + port
}
host = strings.TrimPrefix(host, "http://")
host = strings.TrimPrefix(host, "https://")
host = strings.TrimPrefix(host, "//")
if strings.Contains(host, ":") {
return "http://" + host
}
port := os.Getenv("FAKE_GCS_PORT")
if port == "" {
port = "4443"
}
return "http://" + host + ":" + port
}
package load
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
)
// ExpandSourceURIs resolves gs:// wildcards by listing objects from GCS.
// URIs without a '*' pass through unchanged.
func ExpandSourceURIs(ctx context.Context, uris []string) ([]string, error) {
var out []string
for _, uri := range uris {
if !strings.Contains(uri, "*") {
out = append(out, uri)
continue
}
expanded, err := expandWildcardURI(ctx, uri)
if err != nil {
return nil, err
}
out = append(out, expanded...)
}
return out, nil
}
func expandWildcardURI(ctx context.Context, gsURI string) ([]string, error) {
bucket, objectPattern, err := splitGSURI(gsURI)
if err != nil {
return nil, err
}
before, after, ok := strings.Cut(objectPattern, "*")
if !ok {
return []string{gsURI}, nil
}
prefix := before
suffix := after
names, err := ListGCSObjects(ctx, bucket, prefix)
if err != nil {
return nil, err
}
var matches []string
for _, name := range names {
if !matchesGCSWildcard(name, prefix, suffix) {
continue
}
matches = append(matches, fmt.Sprintf("gs://%s/%s", bucket, name))
}
if len(matches) == 0 {
return nil, fmt.Errorf("no objects matched sourceUri %q", gsURI)
}
return matches, nil
}
func matchesGCSWildcard(name, prefix, suffix string) bool {
if !strings.HasPrefix(name, prefix) {
return false
}
if strings.HasSuffix(name, "/") {
return false
}
rest := name[len(prefix):]
if suffix != "" {
if !strings.HasSuffix(rest, suffix) {
return false
}
rest = rest[:len(rest)-len(suffix)]
}
return rest != ""
}
// ListGCSObjects returns object names under bucket with the given prefix.
func ListGCSObjects(ctx context.Context, bucket, prefix string) ([]string, error) {
base := storageEmulatorBase()
var names []string
pageToken := ""
for {
q := url.Values{
"prefix": {prefix},
"maxResults": {"1000"},
}
if pageToken != "" {
q.Set("pageToken", pageToken)
}
listURL := fmt.Sprintf("%s/storage/v1/b/%s/o?%s",
base, url.PathEscape(bucket), q.Encode())
req, err := http.NewRequestWithContext(ctx, http.MethodGet, listURL, nil)
if err != nil {
return nil, err
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("list gs://%s/%s: %w", bucket, prefix, err)
}
body, err := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("list gs://%s/%s: HTTP %d: %s",
bucket, prefix, resp.StatusCode, strings.TrimSpace(string(body)))
}
var page struct {
Items []struct {
Name string `json:"name"`
} `json:"items"`
NextPageToken string `json:"nextPageToken"`
}
if err := json.Unmarshal(body, &page); err != nil {
return nil, fmt.Errorf("decode list response: %w", err)
}
for _, item := range page.Items {
if item.Name != "" {
names = append(names, item.Name)
}
}
pageToken = page.NextPageToken
if pageToken == "" {
break
}
}
return names, nil
}
func splitGSURI(gsURI string) (bucket, object string, err error) {
if !strings.HasPrefix(gsURI, "gs://") {
return "", "", fmt.Errorf("invalid gs:// uri: %q", gsURI)
}
rest := strings.TrimPrefix(gsURI, "gs://")
slash := strings.Index(rest, "/")
if slash <= 0 {
return "", "", fmt.Errorf("invalid gs:// uri: %q", gsURI)
}
return rest[:slash], rest[slash+1:], nil
}
// ObjectPathFromURI returns the object path within its bucket.
func ObjectPathFromURI(gsURI string) (string, error) {
_, object, err := splitGSURI(gsURI)
return object, err
}
// BucketFromURI returns the bucket name from a gs:// URI.
func BucketFromURI(gsURI string) (string, error) {
bucket, _, err := splitGSURI(gsURI)
return bucket, err
}
package load
import (
"context"
"errors"
"fmt"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
const (
hiveModeCustom = "CUSTOM"
hiveModeAuto = "AUTO"
hiveModeStrings = "STRINGS"
defaultHivePartitionFieldType = "STRING"
)
type hivePartitionField struct {
Name string
Type string
}
func parseHiveURISources(ctx context.Context, cfg *jobs.JobConfigurationLoad,
parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
ext := &bqtypes.ExternalDataConfiguration{
SourceURIs: cfg.SourceURIs,
SourceFormat: cfg.SourceFormat,
Schema: parseSchema,
Autodetect: cfg.Autodetect,
HivePartitioningOptions: cfg.HivePartitioningOptions,
}
parsed, totalBytes, inputFiles, err := parseExternalGCS(ctx, ext, parseSchema, cfg.SkipLeadingRows())
return parsed, totalBytes, inputFiles, err
}
// ParseExternalGCS fetches GCS objects (including wildcards) and applies hive
// partition columns when configured. Shared by load jobs and external tables.
func ParseExternalGCS(
ctx context.Context,
cfg *bqtypes.ExternalDataConfiguration,
schema *bqtypes.TableSchema,
skipLeading int,
) (ParsedRows, int64, int, error) {
return parseExternalGCS(ctx, cfg, schema, skipLeading)
}
func parseExternalGCS(
ctx context.Context,
cfg *bqtypes.ExternalDataConfiguration,
schema *bqtypes.TableSchema,
skipLeading int,
) (ParsedRows, int64, int, error) {
uris, err := ExpandSourceURIs(ctx, cfg.SourceURIs)
if err != nil {
return ParsedRows{}, 0, 0, err
}
var partitionFields []hivePartitionField
if cfg.HivePartitioningOptions != nil {
partitionFields, err = resolveHivePartitionFields(cfg.HivePartitioningOptions)
if err != nil {
return ParsedRows{}, 0, 0, err
}
}
var parsed ParsedRows
var totalBytes int64
for i, uri := range uris {
data, err := FetchSource(ctx, uri)
if err != nil {
return ParsedRows{}, 0, 0, err
}
totalBytes += int64(len(data))
chunk, err := ParseSource(cfg.SourceFormat, data, schema, skipLeading, cfg.Autodetect)
if err != nil {
return ParsedRows{}, 0, 0, err
}
if cfg.HivePartitioningOptions != nil {
partitionValues, partErr := extractHivePartitions(uri, cfg.HivePartitioningOptions)
if partErr != nil {
return ParsedRows{}, 0, 0, partErr
}
applyHivePartitions(chunk.Rows, partitionValues)
if len(partitionFields) == 0 && len(partitionValues) > 0 {
partitionFields = partitionFieldsFromValues(partitionValues, cfg.HivePartitioningOptions)
}
}
parsed = mergeParsedChunk(parsed, chunk, i == 0)
}
if cfg.HivePartitioningOptions != nil && len(partitionFields) > 0 {
parsed.Schema = mergeHiveSchema(parsed.Schema, partitionFields)
}
return parsed, totalBytes, len(uris), nil
}
func applyHivePartitions(rows []map[string]any, partitionValues map[string]string) {
for _, row := range rows {
for k, v := range partitionValues {
row[k] = v
}
}
}
func mergeHiveSchema(dataSchema *bqtypes.TableSchema, partitionFields []hivePartitionField) *bqtypes.TableSchema {
if len(partitionFields) == 0 {
return dataSchema
}
existing := map[string]struct{}{}
if dataSchema != nil {
for _, f := range dataSchema.Fields {
existing[f.Name] = struct{}{}
}
}
out := &bqtypes.TableSchema{}
if dataSchema != nil {
out.Fields = append(out.Fields, dataSchema.Fields...)
}
for _, pf := range partitionFields {
if _, ok := existing[pf.Name]; ok {
continue
}
out.Fields = append(out.Fields, bqtypes.TableFieldSchema{
Name: pf.Name,
Type: pf.Type,
})
}
return out
}
func resolveHivePartitionFields(opts *bqtypes.HivePartitioningOptions) ([]hivePartitionField, error) {
if opts == nil {
return nil, nil
}
mode := strings.ToUpper(strings.TrimSpace(opts.Mode))
switch mode {
case hiveModeCustom:
_, _, fields, err := parseHiveCustomPrefix(opts.SourceURIPrefix)
return fields, err
case hiveModeAuto, hiveModeStrings:
if strings.TrimSpace(opts.SourceURIPrefix) == "" {
return nil, errors.New("hive AUTO/STRINGS mode requires sourceUriPrefix")
}
return nil, nil
default:
return nil, fmt.Errorf("unsupported hive partitioning mode %q", opts.Mode)
}
}
func extractHivePartitions(objectURI string, opts *bqtypes.HivePartitioningOptions) (map[string]string, error) {
if opts == nil {
return nil, nil
}
mode := strings.ToUpper(strings.TrimSpace(opts.Mode))
switch mode {
case hiveModeCustom:
return extractCustomPartitions(objectURI, opts.SourceURIPrefix)
case hiveModeAuto, hiveModeStrings:
return extractAutoPartitions(objectURI, opts.SourceURIPrefix)
default:
return nil, fmt.Errorf("unsupported hive partitioning mode %q", opts.Mode)
}
}
func parseHiveCustomPrefix(template string) (bucket, pathPrefix string, fields []hivePartitionField, err error) {
if !strings.HasPrefix(template, "gs://") {
return "", "", nil, errors.New("sourceUriPrefix must be a gs:// URI")
}
rest := strings.TrimPrefix(template, "gs://")
slash := strings.Index(rest, "/")
if slash <= 0 {
return "", "", nil, errors.New("invalid sourceUriPrefix")
}
bucket = rest[:slash]
pathTemplate := rest[slash+1:]
var prefix strings.Builder
for i := 0; i < len(pathTemplate); {
if pathTemplate[i] == '{' {
close := strings.Index(pathTemplate[i:], "}")
if close < 0 {
return "", "", nil, errors.New("unclosed { in sourceUriPrefix")
}
inner := pathTemplate[i+1 : i+close]
parts := strings.SplitN(inner, ":", 2)
if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
return "", "", nil, fmt.Errorf("invalid hive field %q in sourceUriPrefix", inner)
}
fields = append(fields, hivePartitionField{Name: parts[0], Type: parts[1]})
i += close + 1
if i < len(pathTemplate) && pathTemplate[i] == '/' {
i++
}
continue
}
prefix.WriteByte(pathTemplate[i])
i++
}
return bucket, prefix.String(), fields, nil
}
func extractCustomPartitions(objectURI, sourceURIPrefix string) (map[string]string, error) {
bucket, pathPrefix, fields, err := parseHiveCustomPrefix(sourceURIPrefix)
if err != nil {
return nil, err
}
if len(fields) == 0 {
return nil, errors.New("CUSTOM hive mode requires partition fields in sourceUriPrefix")
}
objPath, err := ObjectPathFromURI(objectURI)
if err != nil {
return nil, err
}
objBucket, err := BucketFromURI(objectURI)
if err != nil {
return nil, err
}
if objBucket != bucket {
return nil, fmt.Errorf("object bucket %q does not match sourceUriPrefix bucket %q", objBucket, bucket)
}
if !strings.HasPrefix(objPath, pathPrefix) {
return nil, fmt.Errorf("object %q does not match hive prefix %q", objectURI, sourceURIPrefix)
}
remainder := strings.TrimPrefix(objPath, pathPrefix)
segments := strings.Split(remainder, "/")
if len(segments) < len(fields)+1 {
return nil, fmt.Errorf("object %q has too few path segments for hive layout", objectURI)
}
partSegments := segments[:len(segments)-1]
out := make(map[string]string, len(fields))
for i, field := range fields {
seg := partSegments[i]
before, after, ok := strings.Cut(seg, "=")
if ok {
if before != field.Name {
return nil, fmt.Errorf("partition segment %q, want key %q", seg, field.Name)
}
out[field.Name] = after
} else {
out[field.Name] = seg
}
}
return out, nil
}
func extractAutoPartitions(objectURI, sourceURIPrefix string) (map[string]string, error) {
prefixPath, err := ObjectPathFromURI(sourceURIPrefix)
if err != nil {
return nil, err
}
if !strings.HasSuffix(prefixPath, "/") {
prefixPath += "/"
}
objPath, err := ObjectPathFromURI(objectURI)
if err != nil {
return nil, err
}
if !strings.HasPrefix(objPath, prefixPath) {
return nil, fmt.Errorf("object %q does not match hive prefix %q", objectURI, sourceURIPrefix)
}
remainder := strings.TrimPrefix(objPath, prefixPath)
segments := strings.Split(remainder, "/")
if len(segments) < 2 {
return nil, fmt.Errorf("object %q has no partition segments", objectURI)
}
partSegments := segments[:len(segments)-1]
out := make(map[string]string, len(partSegments))
for _, seg := range partSegments {
before, after, ok := strings.Cut(seg, "=")
if !ok {
return nil, fmt.Errorf("partition segment %q is not key=value", seg)
}
out[before] = after
}
return out, nil
}
func partitionFieldsFromValues(
values map[string]string,
opts *bqtypes.HivePartitioningOptions,
) []hivePartitionField {
if len(values) == 0 {
return nil
}
fieldType := defaultHivePartitionFieldType
if strings.EqualFold(strings.TrimSpace(opts.Mode), hiveModeStrings) {
fieldType = defaultHivePartitionFieldType
}
order := opts.Fields
if len(order) == 0 {
order = make([]string, 0, len(values))
for name := range values {
order = append(order, name)
}
}
out := make([]hivePartitionField, 0, len(order))
for _, name := range order {
if _, ok := values[name]; !ok {
continue
}
out = append(out, hivePartitionField{Name: name, Type: fieldType})
}
return out
}
package load
import "math"
func uint64ToSignedInt64(v uint64) int64 {
if v > uint64(math.MaxInt64) {
return math.MaxInt64
}
return int64(v)
}
func datastorePropMarker(prop string) []byte {
n := len(prop)
if n == 0 || n > 255 {
return nil
}
buf := make([]byte, 2+n)
buf[0] = 0x1a
buf[1] = byte(n) //nolint:gosec // n is range-checked to [1,255] above
copy(buf[2:], prop)
return buf
}
package load
import (
"bytes"
"encoding/csv"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
const (
fieldTypeString = "STRING"
fieldTypeInteger = "INTEGER"
fieldTypeInt64 = "INT64"
fieldTypeFloat = "FLOAT"
fieldTypeFloat64 = "FLOAT64"
fieldTypeBoolean = "BOOLEAN"
fieldTypeBool = "BOOL"
fieldTypeRecord = "RECORD"
fieldModeRequired = "REQUIRED"
fieldModeRepeated = "REPEATED"
writeAppend = "WRITE_APPEND"
)
// ParsedRows is the in-memory row batch produced by a format parser.
type ParsedRows struct {
Schema *bqtypes.TableSchema
Rows []map[string]any
}
// ParseSource decodes load bytes according to sourceFormat.
func ParseSource(format string, data []byte, schema *bqtypes.TableSchema,
skipLeading int, autodetect bool,
) (ParsedRows, error) {
switch strings.ToUpper(strings.TrimSpace(format)) {
case "", "CSV":
return parseCSV(data, schema, skipLeading, autodetect)
case "NEWLINE_DELIMITED_JSON":
return parseNDJSON(data, schema, autodetect)
case "PARQUET":
return parseParquet(data, schema, autodetect)
case "AVRO":
return parseAvro(data, schema, autodetect)
case "ORC":
return parseORC(data, schema, autodetect)
case sourceFormatDatastoreBackup:
return parseDatastoreEntityBytes(data, schema)
default:
return ParsedRows{}, fmt.Errorf("unsupported sourceFormat %q", format)
}
}
func parseCSV(data []byte, schema *bqtypes.TableSchema, skipLeading int, autodetect bool) (ParsedRows, error) {
r := csv.NewReader(bytes.NewReader(data))
r.TrimLeadingSpace = true
all, err := r.ReadAll()
if err != nil {
return ParsedRows{}, fmt.Errorf("parse CSV: %w", err)
}
if len(all) <= skipLeading {
return ParsedRows{Schema: schema, Rows: nil}, nil
}
dataRows := all[skipLeading:]
if schema == nil || len(schema.Fields) == 0 {
if !autodetect && len(dataRows) > 0 {
return ParsedRows{}, errors.New("load job requires schema or autodetect=true for CSV")
}
if len(dataRows) == 0 {
return ParsedRows{}, nil
}
if autodetect && skipLeading > 0 {
header := all[skipLeading-1]
schema = inferSchemaFromCSVHeader(header, dataRows)
} else {
width := len(dataRows[0])
fields := make([]bqtypes.TableFieldSchema, width)
for i := range fields {
fields[i] = bqtypes.TableFieldSchema{
Name: fmt.Sprintf("string_field_%d", i),
Type: fieldTypeString,
}
}
schema = &bqtypes.TableSchema{Fields: fields}
}
}
fields := schema.Fields
out := make([]map[string]any, 0, len(dataRows))
for _, rec := range dataRows {
row := make(map[string]any, len(fields))
for i, f := range fields {
if i < len(rec) {
row[f.Name] = coerceCSVCell(rec[i], f.Type)
} else {
row[f.Name] = nil
}
}
out = append(out, row)
}
return ParsedRows{Schema: schema, Rows: out}, nil
}
func inferSchemaFromCSVHeader(header []string, dataRows [][]string) *bqtypes.TableSchema {
fields := make([]bqtypes.TableFieldSchema, len(header))
for i, name := range header {
fields[i] = bqtypes.TableFieldSchema{
Name: strings.TrimSpace(name),
Type: inferCSVColumnType(columnValues(dataRows, i)),
}
}
return &bqtypes.TableSchema{Fields: fields}
}
func columnValues(rows [][]string, col int) []string {
out := make([]string, 0, len(rows))
for _, row := range rows {
if col < len(row) {
out = append(out, strings.TrimSpace(row[col]))
}
}
return out
}
func inferCSVColumnType(values []string) string {
if len(values) == 0 {
return fieldTypeString
}
allInt := true
for _, v := range values {
if v == "" {
continue
}
if _, err := strconv.ParseInt(v, 10, 64); err != nil {
allInt = false
break
}
}
if allInt {
return fieldTypeInteger
}
return fieldTypeString
}
func coerceCSVCell(raw string, fieldType string) any {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil
}
switch strings.ToUpper(strings.TrimSpace(fieldType)) {
case fieldTypeInteger, "INT64":
if n, err := strconv.ParseInt(raw, 10, 64); err == nil {
return int(n)
}
case fieldTypeFloat, "FLOAT64":
if f, err := strconv.ParseFloat(raw, 64); err == nil {
return f
}
case fieldTypeBoolean, "BOOL":
switch strings.ToLower(raw) {
case "true", "t", "1", "yes":
return true
case "false", "f", "0", "no":
return false
}
case fieldTypeTimestamp:
if ts, ok := parseCSVDateTime(raw, true); ok {
return ts
}
case "DATETIME":
if ts, ok := parseCSVDateTime(raw, false); ok {
return ts
}
}
return raw
}
// parseCSVDateTime parses RFC3339/RFC3339Nano timestamps from CSV cells.
// TIMESTAMP values keep timezone information; DATETIME values are normalized
// to a UTC wall-clock string without a zone suffix.
func parseCSVDateTime(raw string, keepZone bool) (string, bool) {
layouts := []string{
time.RFC3339Nano,
time.RFC3339,
"2006-01-02 15:04:05.999999 UTC",
"2006-01-02 15:04:05 UTC",
"2006-01-02 15:04:05.999999",
"2006-01-02 15:04:05",
"2006-01-02T15:04:05",
}
for _, layout := range layouts {
if ts, err := time.Parse(layout, raw); err == nil {
if keepZone {
return ts.UTC().Format(time.RFC3339Nano), true
}
return ts.UTC().Format("2006-01-02T15:04:05.999999"), true
}
}
return "", false
}
func parseNDJSON(data []byte, schema *bqtypes.TableSchema, autodetect bool) (ParsedRows, error) {
lines := bytes.Split(bytes.TrimSpace(data), []byte("\n"))
out := make([]map[string]any, 0, len(lines))
for _, line := range lines {
if len(bytes.TrimSpace(line)) == 0 {
continue
}
var row map[string]any
if err := json.Unmarshal(line, &row); err != nil {
return ParsedRows{}, fmt.Errorf("parse JSON line: %w", err)
}
out = append(out, row)
}
if schema == nil || len(schema.Fields) == 0 {
if !autodetect {
return ParsedRows{}, errors.New("load job requires schema or autodetect=true for JSON")
}
schema = inferSchemaFromRows(out)
}
return ParsedRows{Schema: schema, Rows: out}, nil
}
func inferSchemaFromRows(rows []map[string]any) *bqtypes.TableSchema {
if len(rows) == 0 {
return &bqtypes.TableSchema{}
}
seen := map[string]struct{}{}
order := make([]string, 0)
for _, row := range rows {
for k := range row {
if _, ok := seen[k]; !ok {
seen[k] = struct{}{}
order = append(order, k)
}
}
}
fields := make([]bqtypes.TableFieldSchema, 0, len(order))
for _, name := range order {
fields = append(fields, bqtypes.TableFieldSchema{Name: name, Type: fieldTypeString})
}
return &bqtypes.TableSchema{Fields: fields}
}
package load
import (
"bytes"
"encoding/json"
"errors"
"fmt"
goavro "github.com/linkedin/goavro/v2"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
func parseAvro(data []byte, schema *bqtypes.TableSchema, autodetect bool) (ParsedRows, error) {
ocf, err := goavro.NewOCFReader(bytes.NewReader(data))
if err != nil {
return ParsedRows{}, fmt.Errorf("parse Avro OCF: %w", err)
}
avroSchema := ocf.Codec().Schema()
if schema == nil || len(schema.Fields) == 0 {
if !autodetect {
schema, err = avroJSONSchemaToBQ(avroSchema)
if err != nil {
return ParsedRows{}, fmt.Errorf("parse Avro schema: %w", err)
}
}
}
rows := make([]map[string]any, 0)
for ocf.Scan() {
rec, readErr := ocf.Read()
if readErr != nil {
return ParsedRows{}, fmt.Errorf("read Avro row: %w", readErr)
}
row, ok := rec.(map[string]any)
if !ok {
return ParsedRows{}, fmt.Errorf("read Avro row: unexpected type %T", rec)
}
rows = append(rows, normalizeAvroRow(row))
}
if schema == nil || len(schema.Fields) == 0 {
schema, err = avroJSONSchemaToBQ(avroSchema)
if err != nil {
schema = inferSchemaFromRows(rows)
}
}
return ParsedRows{Schema: schema, Rows: rows}, nil
}
func normalizeAvroRow(row map[string]any) map[string]any {
if row == nil {
return map[string]any{}
}
out := make(map[string]any, len(row))
for k, v := range row {
out[k] = avroValueToAny(v)
}
return out
}
func avroValueToAny(v any) any {
switch val := v.(type) {
case map[string]any:
out := make(map[string]any, len(val))
for k, sub := range val {
out[k] = avroValueToAny(sub)
}
return out
case []any:
out := make([]any, len(val))
for i, sub := range val {
out[i] = avroValueToAny(sub)
}
return out
case []byte:
return string(val)
default:
return val
}
}
func avroJSONSchemaToBQ(schemaJSON string) (*bqtypes.TableSchema, error) {
var root any
if err := json.Unmarshal([]byte(schemaJSON), &root); err != nil {
return nil, fmt.Errorf("decode Avro schema JSON: %w", err)
}
fields, err := avroTypeToBQFields(root)
if err != nil {
return nil, err
}
return &bqtypes.TableSchema{Fields: fields}, nil
}
func avroTypeToBQFields(node any) ([]bqtypes.TableFieldSchema, error) {
switch n := node.(type) {
case map[string]any:
if typ, ok := n["type"].(string); ok && typ == "record" {
rawFields, ok := n["fields"].([]any)
if !ok {
return nil, errors.New("avro record missing fields")
}
out := make([]bqtypes.TableFieldSchema, 0, len(rawFields))
for _, rf := range rawFields {
fm, ok := rf.(map[string]any)
if !ok {
return nil, errors.New("avro field entry has unexpected shape")
}
name, _ := fm["name"].(string)
if name == "" {
return nil, errors.New("avro field missing name")
}
field, err := avroFieldTypeToBQ(name, fm["type"])
if err != nil {
return nil, err
}
out = append(out, field)
}
return out, nil
}
field, err := avroFieldTypeToBQ("", n)
if err != nil {
return nil, err
}
if field.Name == "" {
return []bqtypes.TableFieldSchema{field}, nil
}
return []bqtypes.TableFieldSchema{field}, nil
default:
field, err := avroFieldTypeToBQ("", n)
if err != nil {
return nil, err
}
return []bqtypes.TableFieldSchema{field}, nil
}
}
func avroFieldTypeToBQ(name string, node any) (bqtypes.TableFieldSchema, error) {
typ, mode, nested, err := avroTypeNode(node)
if err != nil {
return bqtypes.TableFieldSchema{}, err
}
out := bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: nested}
return out, nil
}
func avroTypeNode(node any) (typ, mode string, nested []bqtypes.TableFieldSchema, err error) {
switch n := node.(type) {
case string:
return avroPrimitiveToBQ(n), "", nil, nil
case []any:
nullable := false
var inner any
for _, item := range n {
if s, ok := item.(string); ok && s == "null" {
nullable = true
continue
}
if inner != nil {
return "", "", nil, fmt.Errorf("unsupported Avro union: %v", n)
}
inner = item
}
if inner == nil {
return "", "", nil, errors.New("avro union has no non-null member")
}
typ, mode, nested, err = avroTypeNode(inner)
if err != nil {
return "", "", nil, err
}
if nullable && mode == "" {
mode = ""
}
return typ, mode, nested, nil
case map[string]any:
if t, ok := n["type"].(string); ok {
switch t {
case "array":
elemTyp, _, elemNested, err := avroTypeNode(n["items"])
if err != nil {
return "", "", nil, err
}
return elemTyp, fieldModeRepeated, elemNested, nil
case "record":
fields, err := avroTypeToBQFields(n)
if err != nil {
return "", "", nil, err
}
return fieldTypeRecord, "", fields, nil
default:
return avroPrimitiveToBQ(t), "", nil, nil
}
}
return "", "", nil, fmt.Errorf("unsupported Avro type map: %v", n)
default:
return "", "", nil, fmt.Errorf("unsupported Avro type node: %T", node)
}
}
func avroPrimitiveToBQ(avroType string) string {
switch avroType {
case "boolean":
return fieldTypeBoolean
case "int", "long":
return fieldTypeInteger
case "float", "double":
return fieldTypeFloat
case "bytes", "fixed":
return "BYTES"
default:
return fieldTypeString
}
}
package load
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
)
var datastoreOutputFileRE = regexp.MustCompile(`output-\d+`)
// parseDatastoreBackupSources loads a DATASTORE_BACKUP / Firestore export
// referenced by a *.export_metadata URI, fetches the companion output-* entity
// files, and decodes enough entity properties for the public samples.
func parseDatastoreBackupSources(ctx context.Context, cfg *jobs.JobConfigurationLoad,
parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
if cfg == nil || len(cfg.SourceURIs) == 0 {
return ParsedRows{}, 0, 0, errors.New("DATASTORE_BACKUP requires sourceUris")
}
metaURI := cfg.SourceURIs[0]
metaBytes, err := FetchSource(ctx, metaURI)
if err != nil {
return ParsedRows{}, 0, 0, err
}
totalBytes := int64(len(metaBytes))
trimmed := bytes.TrimSpace(metaBytes)
if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
parsed, err := parseDatastoreJSON(trimmed, parseSchema)
return parsed, totalBytes, 1, err
}
baseDir := datastoreBackupBaseDir(metaURI)
outputs := uniqueStrings(datastoreOutputFileRE.FindAllString(string(metaBytes), -1))
if len(outputs) == 0 {
outputs = []string{"output-0"}
}
var parsed ParsedRows
filesRead := 0
for _, name := range outputs {
uri := baseDir + name
data, ferr := FetchSource(ctx, uri)
if ferr != nil {
continue
}
totalBytes += int64(len(data))
filesRead++
chunk, perr := parseDatastoreEntityBytes(data, parseSchema)
if perr != nil {
return ParsedRows{}, 0, 0, perr
}
parsed = mergeParsedChunk(parsed, chunk, filesRead == 1)
}
if filesRead == 0 {
return ParsedRows{}, 0, 0, fmt.Errorf("DATASTORE_BACKUP: no output files found for %q", metaURI)
}
return parsed, totalBytes, filesRead, nil
}
func datastoreBackupBaseDir(uri string) string {
if i := strings.LastIndex(uri, "/"); i >= 0 {
return uri[:i+1]
}
return ""
}
func uniqueStrings(in []string) []string {
if len(in) == 0 {
return nil
}
seen := make(map[string]struct{}, len(in))
out := make([]string, 0, len(in))
for _, s := range in {
if s == "" {
continue
}
if _, ok := seen[s]; ok {
continue
}
seen[s] = struct{}{}
out = append(out, s)
}
return out
}
func parseDatastoreJSON(data []byte, schema *bqtypes.TableSchema) (ParsedRows, error) {
if len(data) > 0 && data[0] == '[' {
var rows []map[string]any
if err := json.Unmarshal(data, &rows); err != nil {
return ParsedRows{}, fmt.Errorf("parse DATASTORE_BACKUP JSON array: %w", err)
}
return finalizeDatastoreRows(rows, schema), nil
}
var doc map[string]any
if err := json.Unmarshal(data, &doc); err != nil {
return ParsedRows{}, fmt.Errorf("parse DATASTORE_BACKUP JSON: %w", err)
}
if entities, ok := doc["entities"].([]any); ok {
rows := make([]map[string]any, 0, len(entities))
for _, ent := range entities {
if m, ok := ent.(map[string]any); ok {
rows = append(rows, flattenDatastoreEntity(m))
}
}
return finalizeDatastoreRows(rows, schema), nil
}
return finalizeDatastoreRows([]map[string]any{flattenDatastoreEntity(doc)}, schema), nil
}
func flattenDatastoreEntity(ent map[string]any) map[string]any {
if props, ok := ent["properties"].(map[string]any); ok {
out := make(map[string]any, len(props))
for k, v := range props {
out[k] = unwrapDatastoreValue(v)
}
return out
}
return ent
}
func unwrapDatastoreValue(v any) any {
m, ok := v.(map[string]any)
if !ok {
return v
}
for _, key := range []string{
"stringValue", "integerValue", "doubleValue", "booleanValue",
"timestampValue", "nullValue",
} {
raw, ok := m[key]
if !ok {
continue
}
if key == "integerValue" {
return unwrapDatastoreInteger(raw)
}
return raw
}
return v
}
func unwrapDatastoreInteger(raw any) any {
s, ok := raw.(string)
if !ok {
return raw
}
n, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return raw
}
return int(n)
}
func parseDatastoreEntityBytes(data []byte, schema *bqtypes.TableSchema) (ParsedRows, error) {
trimmed := bytes.TrimSpace(data)
if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
return parseDatastoreJSON(trimmed, schema)
}
rows := scanDatastoreEntities(data)
return finalizeDatastoreRows(rows, schema), nil
}
func finalizeDatastoreRows(rows []map[string]any, schema *bqtypes.TableSchema) ParsedRows {
if schema == nil || len(schema.Fields) == 0 {
schema = inferSchemaFromRows(rows)
}
return ParsedRows{Schema: schema, Rows: rows}
}
// scanDatastoreEntities heuristically extracts Firestore/Datastore entity
// properties from LevelDB-encoded export output files. Enough for the public
// us-states backup sample (name, post_abbr, year).
func scanDatastoreEntities(data []byte) []map[string]any {
var rows []map[string]any
for i := 0; i < len(data); {
name, next := readDatastoreStringProp(data, i, "name")
if next < 0 {
break
}
abbr, next := readDatastoreStringProp(data, next, "post_abbr")
if next < 0 {
i++
continue
}
year, next := readDatastoreVarintProp(data, next, "year")
if next < 0 {
i++
continue
}
if name != "" {
row := map[string]any{datastorePropName: name}
if abbr != "" {
row["post_abbr"] = abbr
}
if year != 0 {
row["year"] = year
}
rows = append(rows, row)
}
i = next
}
return rows
}
func readDatastoreStringProp(data []byte, start int, prop string) (string, int) {
marker := datastorePropMarker(prop)
if marker == nil {
return "", -1
}
idx := bytes.Index(data[start:], marker)
if idx < 0 {
return "", -1
}
pos := start + idx + len(marker)
for pos < len(data) && (data[pos] == ' ' || data[pos] == 0) {
pos++
}
if pos >= len(data) || data[pos] != 0x1a {
return "", -1
}
pos++
if pos >= len(data) {
return "", -1
}
length := int(data[pos])
pos++
if pos+length > len(data) {
return "", -1
}
return string(data[pos : pos+length]), pos + length
}
func readDatastoreVarintProp(data []byte, start int, prop string) (int64, int) {
marker := datastorePropMarker(prop)
if marker == nil {
return 0, -1
}
idx := bytes.Index(data[start:], marker)
if idx < 0 {
return 0, -1
}
pos := start + idx + len(marker)
for pos < len(data) && (data[pos] == ' ' || data[pos] == 0) {
pos++
}
if pos >= len(data) || data[pos] != 0x08 {
return 0, -1
}
pos++
var val uint64
shift := 0
for pos < len(data) {
b := data[pos]
pos++
val |= uint64(b&0x7f) << shift
if b < 0x80 {
return uint64ToSignedInt64(val), pos
}
shift += 7
}
return 0, -1
}
package load
import (
"bytes"
"errors"
"fmt"
"strings"
"github.com/scritchley/orc"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
func parseORC(data []byte, schema *bqtypes.TableSchema, autodetect bool) (parsed ParsedRows, err error) {
defer func() {
if r := recover(); r != nil {
err = fmt.Errorf("parse ORC: %v", r)
}
}()
if len(data) < 3 || string(data[:3]) != "ORC" {
return ParsedRows{}, errors.New("parse ORC: invalid ORC file header")
}
r, err := orc.NewReader(bytes.NewReader(data))
if err != nil {
return ParsedRows{}, fmt.Errorf("parse ORC: %w", err)
}
defer func() { _ = r.Close() }()
orcSchema := r.Schema()
cols := orcSchema.Columns()
if len(cols) == 0 {
return ParsedRows{}, errors.New("parse ORC: empty schema")
}
if schema == nil || len(schema.Fields) == 0 {
if !autodetect {
schema = orcSchemaToBQ(orcSchema)
}
}
cur := r.Select(cols...)
rows := make([]map[string]any, 0)
for cur.Stripes() {
for cur.Next() {
vals := cur.Row()
row := make(map[string]any, len(cols))
for i, col := range cols {
if i < len(vals) {
row[col] = orcValueToAny(vals[i])
} else {
row[col] = nil
}
}
rows = append(rows, row)
}
}
if schema == nil || len(schema.Fields) == 0 {
schema = orcSchemaToBQ(orcSchema)
if schema == nil || len(schema.Fields) == 0 {
schema = inferSchemaFromRows(rows)
}
}
return ParsedRows{Schema: schema, Rows: rows}, nil
}
func orcSchemaToBQ(td *orc.TypeDescription) *bqtypes.TableSchema {
if td == nil {
return &bqtypes.TableSchema{}
}
fields := orcFieldsToBQ(td)
return &bqtypes.TableSchema{Fields: fields}
}
func orcFieldsToBQ(td *orc.TypeDescription) []bqtypes.TableFieldSchema {
out := make([]bqtypes.TableFieldSchema, 0, len(td.Columns()))
for _, name := range td.Columns() {
child, err := td.GetField(name)
if err != nil {
continue
}
out = append(out, orcFieldToBQ(name, child))
}
return out
}
func orcFieldToBQ(name string, td *orc.TypeDescription) bqtypes.TableFieldSchema {
typ, mode, nested := orcTypeStringToBQ(td.String())
return bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: nested}
}
func orcTypeStringToBQ(typeStr string) (typ, mode string, nested []bqtypes.TableFieldSchema) {
typeStr = strings.TrimSpace(typeStr)
if strings.HasPrefix(typeStr, "struct<") && strings.HasSuffix(typeStr, ">") {
inner := strings.TrimSuffix(strings.TrimPrefix(typeStr, "struct<"), ">")
return fieldTypeRecord, "", parseORCStructFields(inner)
}
if strings.HasPrefix(typeStr, "array<") && strings.HasSuffix(typeStr, ">") {
inner := strings.TrimSuffix(strings.TrimPrefix(typeStr, "array<"), ">")
elemTyp, _, _ := orcTypeStringToBQ(inner)
return elemTyp, fieldModeRepeated, nil
}
switch typeStr {
case "boolean":
return fieldTypeBoolean, "", nil
case "tinyint", "smallint", "int", "bigint":
return fieldTypeInteger, "", nil
case "float", "double":
return fieldTypeFloat, "", nil
case "string", "varchar", "char":
return fieldTypeString, "", nil
case "binary":
return "BYTES", "", nil
case "date", "timestamp":
return fieldTypeTimestamp, "", nil
case "decimal":
return "NUMERIC", "", nil
default:
return fieldTypeString, "", nil
}
}
func parseORCStructFields(inner string) []bqtypes.TableFieldSchema {
parts := splitORCStructFields(inner)
out := make([]bqtypes.TableFieldSchema, 0, len(parts))
for _, part := range parts {
colon := strings.Index(part, ":")
if colon <= 0 {
continue
}
name := strings.TrimSpace(part[:colon])
typStr := strings.TrimSpace(part[colon+1:])
typ, mode, nested := orcTypeStringToBQ(typStr)
out = append(out, bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: nested})
}
return out
}
func splitORCStructFields(inner string) []string {
var parts []string
depth := 0
start := 0
for i, ch := range inner {
switch ch {
case '<':
depth++
case '>':
depth--
case ',':
if depth == 0 {
parts = append(parts, inner[start:i])
start = i + 1
}
}
}
if start < len(inner) {
parts = append(parts, inner[start:])
}
return parts
}
func orcValueToAny(v any) any {
switch val := v.(type) {
case map[string]any:
out := make(map[string]any, len(val))
for k, sub := range val {
out[k] = orcValueToAny(sub)
}
return out
case []any:
out := make([]any, len(val))
for i, sub := range val {
out[i] = orcValueToAny(sub)
}
return out
case []byte:
return string(val)
default:
return val
}
}
package load
import (
"bytes"
"errors"
"fmt"
"io"
"time"
"github.com/parquet-go/parquet-go"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
func parseParquet(data []byte, schema *bqtypes.TableSchema, autodetect bool) (ParsedRows, error) {
f, err := parquet.OpenFile(bytes.NewReader(data), int64(len(data)))
if err != nil {
return ParsedRows{}, fmt.Errorf("parse Parquet: %w", err)
}
if schema == nil || len(schema.Fields) == 0 {
if !autodetect {
schema = parquetFileSchemaToBQ(f.Schema())
}
}
reader := parquet.NewReader(f)
defer func() { _ = reader.Close() }()
rows := make([]map[string]any, 0, f.NumRows())
for {
row := make(map[string]any)
err := reader.Read(&row)
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return ParsedRows{}, fmt.Errorf("read Parquet rows: %w", err)
}
rows = append(rows, normalizeParquetRow(row))
}
if schema == nil || len(schema.Fields) == 0 {
schema = inferSchemaFromRows(rows)
if schema == nil {
schema = parquetFileSchemaToBQ(f.Schema())
}
}
return ParsedRows{Schema: schema, Rows: rows}, nil
}
func normalizeParquetRow(row map[string]any) map[string]any {
if row == nil {
return map[string]any{}
}
out := make(map[string]any, len(row))
for k, v := range row {
out[k] = parquetValueToAny(v)
}
return out
}
func parquetValueToAny(v any) any {
switch val := v.(type) {
case time.Time:
return val.UTC().Format(time.RFC3339Nano)
case *time.Time:
if val == nil {
return nil
}
return val.UTC().Format(time.RFC3339Nano)
case map[string]any:
out := make(map[string]any, len(val))
for k, sub := range val {
out[k] = parquetValueToAny(sub)
}
return out
case []any:
out := make([]any, len(val))
for i, sub := range val {
out[i] = parquetValueToAny(sub)
}
return out
case int64:
// Pandas/pyarrow often stores TIMESTAMP columns as INT64
// microseconds since Unix epoch in Parquet.
if val > 1_000_000_000_000 && val < 100_000_000_000_000_000 {
return time.UnixMicro(val).UTC().Format(time.RFC3339Nano)
}
return val
case int:
return val
default:
return val
}
}
func parquetFileSchemaToBQ(s *parquet.Schema) *bqtypes.TableSchema {
if s == nil {
return &bqtypes.TableSchema{}
}
fields := s.Fields()
out := make([]bqtypes.TableFieldSchema, 0, len(fields))
for _, f := range fields {
out = append(out, parquetFieldToBQ(f))
}
return &bqtypes.TableSchema{Fields: out}
}
func parquetFieldToBQ(f parquet.Field) bqtypes.TableFieldSchema {
name := f.Name()
typ := parquetNodeTypeToBQ(f)
mode := ""
if f.Required() {
mode = fieldModeRequired
}
if f.Repeated() {
mode = fieldModeRepeated
}
nested := f.Fields()
if len(nested) > 0 && typ == fieldTypeRecord {
sub := make([]bqtypes.TableFieldSchema, 0, len(nested))
for _, nf := range nested {
sub = append(sub, parquetFieldToBQ(nf))
}
return bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: sub}
}
return bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode}
}
func parquetNodeTypeToBQ(f parquet.Field) string {
if len(f.Fields()) > 0 {
return fieldTypeRecord
}
switch f.Type().String() {
case fieldTypeBoolean:
return fieldTypeBoolean
case "INT32", "INT64", "UINT32", "UINT64":
return fieldTypeInteger
case "INT96", fieldTypeTimestamp:
return fieldTypeTimestamp
case "FLOAT", "DOUBLE":
return fieldTypeFloat
case "BYTE_ARRAY", "FIXED_LEN_BYTE_ARRAY":
return fieldTypeString
default:
if lt := f.Type().LogicalType(); lt != nil && lt.String() == fieldTypeTimestamp {
return fieldTypeTimestamp
}
return fieldTypeString
}
}
package load
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
"net/url"
"strings"
)
// PutGCS uploads object bytes to fake-gcs or the JSON API media endpoint.
func PutGCS(ctx context.Context, gsURI string, contentType string, data []byte) error {
bucket, object, err := parseGSURI(gsURI)
if err != nil {
return err
}
base := storageEmulatorBase()
uploadURL := fmt.Sprintf("%s/upload/storage/v1/b/%s/o?uploadType=media&name=%s",
base, url.PathEscape(bucket), url.QueryEscape(object))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, uploadURL, bytes.NewReader(data))
if err != nil {
return err
}
if contentType == "" {
contentType = "application/octet-stream"
}
req.Header.Set("Content-Type", contentType)
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("put %s: %w", gsURI, err)
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
return fmt.Errorf("put %s: HTTP %d: %s", gsURI, resp.StatusCode, strings.TrimSpace(string(body)))
}
return nil
}
func parseGSURI(gsURI string) (bucket, object string, err error) {
rest := strings.TrimPrefix(gsURI, "gs://")
slash := strings.Index(rest, "/")
if slash <= 0 || slash == len(rest)-1 {
return "", "", fmt.Errorf("invalid gs:// uri: %q", gsURI)
}
return rest[:slash], rest[slash+1:], nil
}
package load
import (
"context"
"fmt"
"slices"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
)
const (
schemaUpdateAllowFieldAddition = "ALLOW_FIELD_ADDITION"
schemaUpdateAllowFieldRelaxation = "ALLOW_FIELD_RELAXATION"
)
// TablePatchSchemaOptions are the schemaUpdateOptions honored by
// tables.patch when syncing schema changes to the engine catalog.
var TablePatchSchemaOptions = []string{
schemaUpdateAllowFieldAddition,
schemaUpdateAllowFieldRelaxation,
}
// existingDestinationSchema returns the catalog schema for a destination
// table when the load job omits an explicit schema and autodetect=false.
func existingDestinationSchema(ctx context.Context, catalog enginepb.CatalogClient,
projectID, datasetID, tableID string,
) *bqtypes.TableSchema {
tableRef := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
if !tableExists(ctx, catalog, tableRef) {
return nil
}
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
return nil
}
return schemaFromProto(desc.GetSchema())
}
// resolveDestinationSchema merges load-time schema updates into the destination
// table schema for WRITE_APPEND jobs. When the merged schema differs from the
// engine catalog, existing rows are preserved via drop-and-recreate.
func resolveDestinationSchema(ctx context.Context, catalog enginepb.CatalogClient,
cfg *jobs.JobConfigurationLoad, projectID, datasetID, tableID string,
loadSchema *bqtypes.TableSchema,
) (*enginepb.TableSchema, error) {
wd := cfg.WriteDisposition
if wd == "" {
wd = writeAppend
}
if wd != writeAppend || len(cfg.SchemaUpdateOptions) == 0 {
return SchemaToProto(loadSchema), nil
}
tableRef := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
if !tableExists(ctx, catalog, tableRef) {
return SchemaToProto(loadSchema), nil
}
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
return nil, fmt.Errorf("describe destination table: %w", err)
}
existing := schemaFromProto(desc.GetSchema())
explicit := cfg.Schema
if explicit == nil || len(explicit.Fields) == 0 {
explicit = loadSchema
}
merged, changed, err := mergeSchemas(existing, explicit, cfg.SchemaUpdateOptions, false)
if err != nil {
return nil, err
}
if !changed {
return SchemaToProto(existing), nil
}
preserved, err := listAllRows(ctx, catalog, tableRef, desc.GetSchema())
if err != nil {
return nil, err
}
protoMerged := SchemaToProto(merged)
if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
return nil, fmt.Errorf("schema update drop table: %w", err)
}
if _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: tableRef,
Schema: protoMerged,
}); err != nil {
return nil, fmt.Errorf("schema update register table: %w", err)
}
if len(preserved) > 0 {
ref := seed.TableRef{ProjectID: projectID, DatasetID: datasetID, TableID: tableID}
applier := seed.NewCatalogApplier(catalog)
if _, err := applier.InsertRows(ctx, ref, protoMerged, preserved); err != nil {
return nil, fmt.Errorf("schema update re-insert rows: %w", err)
}
}
return protoMerged, nil
}
// MergeSchemasForAppend merges an existing table schema with a query
// result schema honoring BigQuery schemaUpdateOptions.
func MergeSchemasForAppend(
existing *bqtypes.TableSchema,
query *bqtypes.TableSchema,
opts []string,
) (*bqtypes.TableSchema, bool) {
merged, changed, err := mergeSchemas(existing, query, opts, false)
if err != nil {
return existing, false
}
return merged, changed
}
// MergeSchemasForTablePatch merges a PATCH body schema into the catalog
// schema, updating descriptions and relaxing REQUIRED→NULLABLE. Returns
// an error when the patch narrows modes or changes types.
func MergeSchemasForTablePatch(
existing *bqtypes.TableSchema,
patch *bqtypes.TableSchema,
) (*bqtypes.TableSchema, bool, error) {
return mergeSchemas(existing, patch, TablePatchSchemaOptions, true)
}
// ApplySchemaUpdate merges querySchema into the destination catalog
// table when schemaUpdateOptions require field addition or relaxation.
// Existing rows are preserved via drop-and-recreate when the merged
// schema differs from the catalog.
func ApplySchemaUpdate(ctx context.Context, catalog enginepb.CatalogClient,
tableRef *enginepb.TableRef, querySchema *bqtypes.TableSchema, opts []string,
) (*enginepb.TableSchema, error) {
if len(opts) == 0 || querySchema == nil {
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
return nil, fmt.Errorf("describe destination table: %w", err)
}
return desc.GetSchema(), nil
}
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if err != nil {
return nil, fmt.Errorf("describe destination table: %w", err)
}
existing := schemaFromProto(desc.GetSchema())
merged, changed, err := mergeSchemas(existing, querySchema, opts, false)
if err != nil {
return nil, err
}
if !changed {
return desc.GetSchema(), nil
}
preserved, err := listAllRows(ctx, catalog, tableRef, desc.GetSchema())
if err != nil {
return nil, err
}
protoMerged := SchemaToProto(merged)
if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
return nil, fmt.Errorf("schema update drop table: %w", err)
}
if _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: tableRef,
Schema: protoMerged,
}); err != nil {
return nil, fmt.Errorf("schema update register table: %w", err)
}
if len(preserved) > 0 {
ref := seed.TableRef{
ProjectID: tableRef.GetProjectId(),
DatasetID: tableRef.GetDatasetId(),
TableID: tableRef.GetTableId(),
}
applier := seed.NewCatalogApplier(catalog)
if _, err := applier.InsertRows(ctx, ref, protoMerged, preserved); err != nil {
return nil, fmt.Errorf("schema update re-insert rows: %w", err)
}
}
return protoMerged, nil
}
func mergeSchemas(
existing *bqtypes.TableSchema,
load *bqtypes.TableSchema,
opts []string,
strictPatch bool,
) (*bqtypes.TableSchema, bool, error) {
if existing == nil {
existing = &bqtypes.TableSchema{}
}
if load == nil {
return existing, false, nil
}
allowAdd := slices.Contains(opts, schemaUpdateAllowFieldAddition)
allowRelax := slices.Contains(opts, schemaUpdateAllowFieldRelaxation)
if !allowAdd && !allowRelax {
return existing, false, nil
}
if strictPatch {
if err := validatePatchAgainstExisting(existing, load); err != nil {
return nil, false, err
}
}
merged := cloneBQSchema(existing)
changed := applySchemaFieldAdditions(merged, load, allowAdd)
changed = applySchemaRelaxationAndDescriptions(merged, load, allowRelax) || changed
return merged, changed, nil
}
func validatePatchAgainstExisting(existing *bqtypes.TableSchema, load *bqtypes.TableSchema) error {
for _, f := range load.Fields {
idx := fieldIndex(existing.Fields, f.Name)
if idx < 0 {
continue
}
if err := validatePatchFieldCompatibility(existing.Fields[idx], f); err != nil {
return err
}
}
return nil
}
func applySchemaFieldAdditions(merged *bqtypes.TableSchema, load *bqtypes.TableSchema, allowAdd bool) bool {
if !allowAdd {
return false
}
changed := false
for _, f := range load.Fields {
if fieldIndex(merged.Fields, f.Name) >= 0 {
continue
}
nf := f
if nf.Mode == fieldModeRequired {
nf.Mode = ""
}
merged.Fields = append(merged.Fields, nf)
changed = true
}
return changed
}
func applySchemaRelaxationAndDescriptions(
merged *bqtypes.TableSchema,
load *bqtypes.TableSchema,
allowRelax bool,
) bool {
changed := false
for i := range merged.Fields {
name := merged.Fields[i].Name
if allowRelax &&
merged.Fields[i].Mode == fieldModeRequired &&
!loadKeepsFieldRequired(load, name) {
merged.Fields[i].Mode = ""
changed = true
}
patchIdx := fieldIndex(load.Fields, name)
if patchIdx < 0 {
continue
}
patchField := load.Fields[patchIdx]
if patchField.Description != "" &&
merged.Fields[i].Description != patchField.Description {
merged.Fields[i].Description = patchField.Description
changed = true
}
}
return changed
}
func validatePatchFieldCompatibility(
existing, patch bqtypes.TableFieldSchema,
) error {
if !fieldTypesCompatible(existing.Type, patch.Type) {
return fmt.Errorf(
"schema update: cannot change type of field %q from %s to %s",
existing.Name, existing.Type, patch.Type,
)
}
exMode := normalizeFieldMode(existing.Mode)
patchMode := normalizeFieldMode(patch.Mode)
if exMode != fieldModeRequired && patchMode == fieldModeRequired {
return fmt.Errorf(
"schema update: cannot change mode of field %q from %s to REQUIRED",
existing.Name, modeLabel(exMode),
)
}
return nil
}
func fieldTypesCompatible(existingType, patchType string) bool {
a := strings.ToUpper(strings.TrimSpace(existingType))
b := strings.ToUpper(strings.TrimSpace(patchType))
if a == b {
return true
}
// REST INTEGER vs engine INT64 round-trip.
if (a == fieldTypeInt64 || a == fieldTypeInteger) && (b == fieldTypeInt64 || b == fieldTypeInteger) {
return true
}
if (a == fieldTypeFloat64 || a == fieldTypeFloat) && (b == fieldTypeFloat64 || b == fieldTypeFloat) {
return true
}
if (a == fieldTypeBool || a == fieldTypeBoolean) && (b == fieldTypeBool || b == fieldTypeBoolean) {
return true
}
return false
}
func normalizeFieldMode(mode string) string {
if mode == "" || strings.EqualFold(mode, "NULLABLE") {
return ""
}
return strings.ToUpper(mode)
}
func modeLabel(mode string) string {
if mode == "" {
return "NULLABLE"
}
return mode
}
func loadKeepsFieldRequired(load *bqtypes.TableSchema, name string) bool {
if load == nil {
return false
}
idx := fieldIndex(load.Fields, name)
// Query/load results default to NULLABLE; only keep REQUIRED when the
// incoming schema still requires it.
return idx >= 0 && load.Fields[idx].Mode == fieldModeRequired
}
func cloneBQSchema(s *bqtypes.TableSchema) *bqtypes.TableSchema {
if s == nil {
return &bqtypes.TableSchema{}
}
out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, len(s.Fields))}
copy(out.Fields, s.Fields)
return out
}
func fieldIndex(fields []bqtypes.TableFieldSchema, name string) int {
for i, f := range fields {
if f.Name == name {
return i
}
}
return -1
}
func schemaFromProto(s *enginepb.TableSchema) *bqtypes.TableSchema {
if s == nil {
return nil
}
out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, 0, len(s.Fields))}
for _, f := range s.Fields {
out.Fields = append(out.Fields, bqtypes.TableFieldSchema{
Name: f.GetName(),
Type: f.GetType(),
Mode: f.GetMode(),
Description: f.GetDescription(),
})
}
return out
}
func listAllRows(ctx context.Context, catalog enginepb.CatalogClient,
tableRef *enginepb.TableRef, schema *enginepb.TableSchema,
) ([]map[string]any, error) {
const page = 10000
var out []map[string]any
start := int64(0)
for {
resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: tableRef,
StartIndex: start,
MaxResults: page,
})
if err != nil {
return nil, fmt.Errorf("list rows for schema update: %w", err)
}
rows := resp.GetRows()
if len(rows) == 0 {
break
}
for _, row := range rows {
out = append(out, protoRowToMap(schema, row))
}
start += int64(len(rows))
if start >= resp.GetTotalRows() {
break
}
}
return out, nil
}
func protoRowToMap(schema *enginepb.TableSchema, row *enginepb.DataRow) map[string]any {
fields := schema.GetFields()
cells := row.GetCells()
out := make(map[string]any, len(fields))
for i, f := range fields {
if i >= len(cells) {
out[f.GetName()] = nil
continue
}
out[f.GetName()] = protoCellToAny(cells[i])
}
return out
}
func protoCellToAny(c *enginepb.Cell) any {
if c == nil || c.GetNullValue() {
return nil
}
if v := c.GetStringValue(); v != "" || c.GetValue() != nil {
return v
}
return nil
}
package load
import (
"bytes"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"io"
"mime"
"mime/multipart"
"net/http"
"strconv"
"strings"
"sync"
"time"
)
// UploadSession tracks a resumable jobs.insert media upload.
type UploadSession struct {
ProjectID string
Metadata []byte
Data []byte
Total int64
Expires time.Time
}
// UploadStore holds in-process resumable upload sessions.
type UploadStore struct {
mu sync.Mutex
sessions map[string]*UploadSession
}
// NewUploadStore returns a fresh upload session table.
func NewUploadStore() *UploadStore {
return &UploadStore{sessions: map[string]*UploadSession{}}
}
var defaultUploadStore = NewUploadStore()
// DefaultUploadStore is the process-local resumable upload session table.
func DefaultUploadStore() *UploadStore { return defaultUploadStore }
// CreateSession registers a resumable upload session and returns its id.
func (s *UploadStore) CreateSession(projectID string, metadata []byte, total int64) string {
id := newUploadID()
s.mu.Lock()
defer s.mu.Unlock()
s.sessions[id] = &UploadSession{
ProjectID: projectID,
Metadata: append([]byte(nil), metadata...),
Total: total,
Expires: time.Now().UTC().Add(24 * time.Hour),
}
return id
}
// Get returns the session for id, or nil when unknown or expired.
func (s *UploadStore) Get(id string) *UploadSession {
s.mu.Lock()
defer s.mu.Unlock()
sess, ok := s.sessions[id]
if !ok || time.Now().UTC().After(sess.Expires) {
delete(s.sessions, id)
return nil
}
return sess
}
// Delete removes a completed or abandoned session.
func (s *UploadStore) Delete(id string) {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.sessions, id)
}
// AppendBytes appends chunk data for a resumable session.
func (s *UploadStore) AppendBytes(id string, chunk []byte, start int64) error {
s.mu.Lock()
defer s.mu.Unlock()
sess, ok := s.sessions[id]
if !ok || time.Now().UTC().After(sess.Expires) {
delete(s.sessions, id)
return errors.New("upload session not found")
}
need := int(start) + len(chunk)
if need > len(sess.Data) {
grown := make([]byte, need)
copy(grown, sess.Data)
sess.Data = grown
}
copy(sess.Data[start:], chunk)
return nil
}
// ReceivedBytes returns how many bytes have been stored for the session.
func (s *UploadStore) ReceivedBytes(id string) int64 {
s.mu.Lock()
defer s.mu.Unlock()
sess, ok := s.sessions[id]
if !ok {
return 0
}
return int64(len(sess.Data))
}
// ParseMultipartJob extracts the metadata JSON and media bytes from a
// multipart/related jobs.insert upload body.
func ParseMultipartJob(body []byte, contentType string) (metadata, media []byte, err error) {
mediaType, params, err := mime.ParseMediaType(contentType)
if err != nil {
return nil, nil, fmt.Errorf("parse Content-Type: %w", err)
}
if !strings.HasPrefix(mediaType, "multipart/") {
return nil, nil, fmt.Errorf("expected multipart Content-Type, got %q", mediaType)
}
boundary := params["boundary"]
if boundary == "" {
return nil, nil, errors.New("multipart boundary missing")
}
reader := multipart.NewReader(bytes.NewReader(body), boundary)
for partIndex := 0; ; partIndex++ {
part, perr := reader.NextPart()
if perr == io.EOF {
break
}
if perr != nil {
return nil, nil, fmt.Errorf("read multipart part: %w", perr)
}
data, rerr := io.ReadAll(part)
if rerr != nil {
return nil, nil, fmt.Errorf("read multipart body: %w", rerr)
}
switch partIndex {
case 0:
metadata = data
case 1:
media = data
}
}
if len(metadata) == 0 {
return nil, nil, errors.New("multipart upload missing metadata part")
}
return metadata, media, nil
}
// ParseContentRange parses a Content-Range header (bytes start-end/total).
// When total is unknown the third return is -1.
func ParseContentRange(header string) (start, end, total int64, ok bool) {
header = strings.TrimSpace(header)
if !strings.HasPrefix(header, "bytes ") {
return 0, 0, 0, false
}
rest := strings.TrimPrefix(header, "bytes ")
parts := strings.Split(rest, "/")
if len(parts) != 2 {
return 0, 0, 0, false
}
if parts[0] == "*" {
if parts[1] == "*" {
return 0, 0, -1, true
}
t, err := strconv.ParseInt(parts[1], 10, 64)
if err != nil {
return 0, 0, 0, false
}
return 0, 0, t, true
}
rangeParts := strings.Split(parts[0], "-")
if len(rangeParts) != 2 {
return 0, 0, 0, false
}
start, err := strconv.ParseInt(rangeParts[0], 10, 64)
if err != nil {
return 0, 0, 0, false
}
end, err = strconv.ParseInt(rangeParts[1], 10, 64)
if err != nil {
return 0, 0, 0, false
}
if parts[1] == "*" {
return start, end, -1, true
}
total, err = strconv.ParseInt(parts[1], 10, 64)
if err != nil {
return 0, 0, 0, false
}
return start, end, total, true
}
// WriteResumeIncomplete responds with HTTP 308 for partial resumable uploads.
func WriteResumeIncomplete(w http.ResponseWriter, received int64) {
if received > 0 {
w.Header().Set("Range", fmt.Sprintf("0-%d", received-1))
}
w.WriteHeader(308) // Resume Incomplete per api-uploads.md
}
func newUploadID() string {
var b [16]byte
_, _ = rand.Read(b[:])
return hex.EncodeToString(b[:])
}
// SessionLocation builds the relative resumable session URI path.
func SessionLocation(projectID, uploadID string) string {
return fmt.Sprintf(
"/upload/bigquery/v2/projects/%s/jobs?uploadType=resumable&upload_id=%s",
projectID, uploadID,
)
}
// AbsoluteSessionLocation builds a fully-qualified Location header value.
// Python/Node resumable upload clients pass the Location URL directly to
// requests/teeny-request and require a scheme (relative paths → MissingSchema).
func AbsoluteSessionLocation(baseURL, projectID, uploadID string) string {
path := SessionLocation(projectID, uploadID)
base := strings.TrimRight(strings.TrimSpace(baseURL), "/")
if base == "" {
return path
}
return base + path
}
// Package middleware contains HTTP middleware for the BigQuery emulator
// gateway. The middleware here is intentionally permissive: the emulator
// follows cloud-spanner-emulator's posture and parses but does not
// validate authentication credentials.
//
// See docs/REST_API.md ("Authentication posture") and the
// gateway-HTTP-surface section of ROADMAP.md for the rationale:
// clients reuse their real BigQuery code paths by
// pointing at the emulator, and that code unconditionally sends a bearer
// token. Rejecting those tokens would force every client to special-case
// the emulator, which we explicitly want to avoid.
package middleware
import (
"context"
"net/http"
"strings"
)
// principalCtxKey is the unexported context-key type used to stash the
// synthetic [Principal] on each request. Following the standard library
// guidance, we use a private named type so callers cannot collide with
// our key by accident.
type principalCtxKey struct{}
// Principal is the synthetic identity the emulator attributes to every
// request. The fields are populated by [WithAuth]; consumers retrieve
// the value via [PrincipalFromContext].
//
// The emulator is deliberately credulous: it does not validate the
// bearer token, look up an account, or check IAM. Handlers that need
// to differentiate authenticated vs anonymous traffic should consult
// [Principal.Anonymous].
type Principal struct {
// Email is the synthetic account email attributed to the caller.
// It is the same value for every request and exists only so logs
// and (eventually) audit trails have a stable subject string.
Email string
// Bearer is the raw token the client presented in the Authorization
// header, with the "Bearer " prefix stripped. Empty when no
// Authorization header was sent or the header could not be parsed.
Bearer string
// Anonymous is true when the request did not present an
// Authorization header at all. A request with a malformed header is
// still considered non-anonymous: the emulator only cares whether
// the client tried, not whether the credential is well-formed.
Anonymous bool
}
// defaultPrincipalEmail is the synthetic email used for every request.
// It mirrors cloud-spanner-emulator, which similarly attributes all
// traffic to a fixed local identity.
const defaultPrincipalEmail = "emulator@bigquery.local"
// WithAuth returns middleware that parses the Authorization header (if
// present) and attaches a [Principal] to the request context. It never
// short-circuits the response: every request is allowed through with a
// synthetic identity, matching cloud-spanner-emulator's posture and the
// emulator's documented behavior in docs/REST_API.md.
//
// The middleware accepts any non-empty Authorization header. RFC 6750
// "Bearer" tokens have the "Bearer " prefix stripped; other schemes are
// stored verbatim in [Principal.Bearer]. The header is never logged.
func WithAuth(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
p := Principal{
Email: defaultPrincipalEmail,
Anonymous: true,
}
if h := strings.TrimSpace(r.Header.Get("Authorization")); h != "" {
p.Anonymous = false
if scheme, token, ok := strings.Cut(h, " "); ok && strings.EqualFold(scheme, "Bearer") {
p.Bearer = strings.TrimSpace(token)
} else {
p.Bearer = h
}
}
ctx := context.WithValue(r.Context(), principalCtxKey{}, p)
next.ServeHTTP(w, r.WithContext(ctx))
})
}
// PrincipalFromContext extracts the [Principal] previously attached by
// [WithAuth]. The boolean is false when the context has no principal,
// which should only happen on requests that bypass the middleware (such
// as direct calls in tests).
func PrincipalFromContext(ctx context.Context) (Principal, bool) {
p, ok := ctx.Value(principalCtxKey{}).(Principal)
return p, ok
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package middleware
import (
"encoding/json"
"net/http"
)
// jsonContentType is the Content-Type header value middleware emits on
// JSON error envelopes. Pinned here so a future tweak (utf-16, an
// http/2 specifier, etc.) lands in exactly one place.
const jsonContentType = "application/json; charset=utf-8"
// errReasonInvalid is the BigQuery-shaped error `status` / `errors[].reason`
// value used for 400 responses the gateway-layer middlewares emit
// (gunzip body invalid, method override misuse). The handlers package
// uses the same literal in `handlers.writeError` calls; we keep a
// local copy here because middleware -> handlers is a forbidden import
// direction (handlers depends on middleware for auth context).
const errReasonInvalid = "invalid"
// fieldKeyMessage is the JSON field key used inside the BigQuery
// `errorBody.errors[].message` envelope. Kept as a named const so the
// recurring use across [writeGunzipError] and [writeMethodOverrideError]
// stays goconst-clean.
const fieldKeyMessage = "message"
// writeJSONError emits a BigQuery-shaped JSON error envelope at
// `status` with `reason` and `msg`. Mirrors `handlers.writeError`
// byte-for-byte but keeps middleware free of a handlers import (which
// would close an import cycle — handlers depends on middleware for
// the auth-context lookup).
func writeJSONError(w http.ResponseWriter, status int, reason, msg string) {
body := map[string]any{
"error": map[string]any{
"code": status,
fieldKeyMessage: msg,
"status": reason,
"errors": []map[string]any{{
"reason": reason,
fieldKeyMessage: msg,
"domain": "global",
}},
},
}
w.Header().Set("Content-Type", jsonContentType)
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(body)
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package middleware
import (
"compress/gzip"
"io"
"net/http"
"strings"
)
// WithGunzipRequestBody wraps next so that when the client sends
// `Content-Encoding: gzip` the request body is transparently
// decompressed before downstream handlers read it. The Java BigQuery
// client gzips JSON POST bodies by default; without this middleware
// every dataset/table create against the emulator REST gateway returns
// `invalid character '\x1f'` because handlers see the raw gzip framing.
//
// The middleware keeps the emulator and full-engine paths aligned on shape: missing/empty
// Content-Encoding short-circuits to next without allocation, an
// invalid gzip stream returns a BigQuery-shaped 400, and on success
// the Content-Encoding header is dropped so handlers don't double-
// decode if they happen to be aware of the encoding.
func WithGunzipRequestBody(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r == nil || r.Body == nil || r.Body == http.NoBody {
next.ServeHTTP(w, r)
return
}
ce := strings.ToLower(strings.TrimSpace(r.Header.Get("Content-Encoding")))
if ce == "" || !strings.Contains(ce, "gzip") {
next.ServeHTTP(w, r)
return
}
gr, err := gzip.NewReader(r.Body)
if err != nil {
writeGunzipError(w, "invalid gzip request body: "+err.Error())
return
}
// gzip.Reader.Close does NOT close the underlying io.ReadCloser
// (see compress/gzip.Reader.Close), so wrap both so the http
// server's body cleanup still fires.
r.Body = &gzipRequestBody{gzip: gr, underlying: r.Body}
// Drop the original Content-Encoding so handlers (and any
// downstream middleware that inspects the header) don't try
// to decode again, and clear Content-Length because the
// inflated stream is necessarily a different size.
r.Header.Del("Content-Encoding")
r.Header.Del("Content-Length")
r.ContentLength = -1
next.ServeHTTP(w, r)
})
}
type gzipRequestBody struct {
gzip *gzip.Reader
underlying io.ReadCloser
}
func (b *gzipRequestBody) Read(p []byte) (int, error) {
return b.gzip.Read(p)
}
func (b *gzipRequestBody) Close() error {
_ = b.gzip.Close()
return b.underlying.Close()
}
// writeGunzipError emits a BigQuery-shaped 400 envelope. Delegates to
// the shared [writeJSONError] helper so the envelope shape stays in
// sync with the method-override middleware's 400 path and so the
// "invalid" / "message" string literals are referenced from exactly
// one place (goconst-clean).
func writeGunzipError(w http.ResponseWriter, msg string) {
writeJSONError(w, http.StatusBadRequest, errReasonInvalid, msg)
}
package middleware
import (
"context"
"net"
"net/http"
)
// loopbackCtxKey is the unexported context-key type used to stash the
// per-request loopback flag [WithLoopbackTag] computes. Following the
// standard library guidance, we use a private named type so callers
// cannot collide with our key by accident.
type loopbackCtxKey struct{}
// WithLoopbackTag returns middleware that records whether the request
// arrived from a loopback caller (an HTTP client bound to `127.0.0.0/8`
// or `::1`, or a unix-socket connection where `RemoteAddr` is empty).
// The flag is stashed in the request context so handlers can decide
// whether to surface emulator-internal debug fields back to the caller
// without having to re-parse `r.RemoteAddr`.
//
// The single user today is the synchronous query handler, which uses
// the flag to gate `Job.statistics.query.emulatorRoute` (the canonical
// `Disposition` string the C++ coordinator's `RouteClassifier`
// produced for the query). The contract is that field is observable
// ONLY to loopback callers; non-loopback callers receive a response
// with the field omitted entirely, matching the public BigQuery REST
// surface byte-for-byte. See
// `docs/ENGINE_POLICY.md` for the wider
// rationale.
//
// The middleware never short-circuits the response: it only attaches
// a boolean to the context. Handlers that want loopback-only behavior
// call [IsLoopback]; handlers that don't care are unaffected.
func WithLoopbackTag(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := context.WithValue(r.Context(), loopbackCtxKey{},
isLoopbackRemoteAddr(r.RemoteAddr))
next.ServeHTTP(w, r.WithContext(ctx))
})
}
// IsLoopback reports whether the request that owns this context
// originated from a loopback caller. It returns false when the
// context has no loopback flag (i.e. the request bypassed
// [WithLoopbackTag], which should only happen in direct unit tests
// that don't go through the middleware stack). Callers that need the
// inverse default for tests can adjust at the call site -- the
// middleware-bypass case is rare enough that "treat as non-loopback"
// is the safer default for production code.
func IsLoopback(ctx context.Context) bool {
v, _ := ctx.Value(loopbackCtxKey{}).(bool)
return v
}
// isLoopbackRemoteAddr returns true when `addr` is the standard Go
// `host:port` shape and the host resolves to a loopback IP. Unix-
// socket connections present an empty `RemoteAddr` on most servers
// (httptest's `httptest.Server` always binds TCP, but a unix-listener
// backed gateway leaves the field empty); we treat the empty string
// as loopback because that is the only realistic deployment shape
// for a unix-socket emulator.
//
// Malformed addresses are treated as non-loopback so a bug in an
// upstream proxy that strips the port can't accidentally elevate a
// public caller to loopback status.
func isLoopbackRemoteAddr(addr string) bool {
if addr == "" {
return true
}
host, _, err := net.SplitHostPort(addr)
if err != nil {
// `RemoteAddr` should be in `host:port` form per the
// net/http documentation. A failure here means the input
// is malformed; default to non-loopback so a misrouted
// caller cannot accidentally observe loopback-only fields.
return false
}
ip := net.ParseIP(host)
if ip == nil {
return false
}
return ip.IsLoopback()
}
// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package middleware
import (
"net/http"
"strings"
)
// methodOverrideHeader is the canonical header name the
// google-api-client `MethodOverride` interceptor sets when its
// underlying transport reports that a method (typically PATCH) is
// unsupported. See `com.google.api.client.googleapis.MethodOverride`
// in google-api-client-2.x and `NetHttpTransport.SUPPORTED_METHODS`
// in google-http-client (PATCH is intentionally omitted from that
// list, so PATCH-shaped REST calls travel as POST + this header).
const methodOverrideHeader = "X-HTTP-Method-Override"
// WithMethodOverride returns middleware that honors the
// `X-HTTP-Method-Override` request header so emulator clients written
// against transports that do not support PATCH (notably the Java
// google-api-client + java.net.HttpURLConnection combo) can drive
// PATCH/PUT/DELETE handlers via a tunneled POST.
//
// Why we need this: the Java BigQuery client's default
// `NetHttpTransport` advertises support for GET, HEAD, OPTIONS, POST,
// PUT, DELETE, TRACE — but not PATCH. The
// `com.google.api.client.googleapis.MethodOverride` interceptor (a
// default `HttpExecuteInterceptor` on every google-api-client request)
// rewrites unsupported methods to POST and sets
// `X-HTTP-Method-Override: <originalMethod>`. The gateway's mux
// otherwise mounts dataset/table/job updates at PATCH and PUT, so
// without this middleware Java callers like AuthorizeDatasetIT land on
// `DatasetCustomMethodPOST` (the `/datasets/{id}:undelete` dispatcher)
// and get a 405 against a request that was logically a PATCH.
// Mounting the override at the middleware layer fixes the entire
// gateway surface with one rewrite point instead of teaching every
// `*CustomMethodPOST` handler to also accept ACL bodies.
//
// Behavior:
// - Header absent → pass through unchanged. The middleware never
// allocates anything for the common case.
// - Header set + request method is POST + override is one of
// PATCH/PUT/DELETE (case-insensitive) → rewrite `r.Method` to the
// uppercase override and continue. Mux dispatch then routes the
// request to the genuine PATCH/PUT/DELETE handler.
// - Header set + request method is not POST → 400. The
// google-api-client interceptor only ever sets the header on a
// POST it just rewrote, so a non-POST + override is either a
// misconfigured client or an attempt to confuse the dispatcher;
// either way, refusing is the safe answer.
// - Header set + override value is anything other than
// PATCH/PUT/DELETE → 400. We don't honor `GET`/`HEAD`/`OPTIONS`
// because they are not what the Java client tunnels and we want
// a tight surface.
//
// Mounting: place the middleware after the access-log middleware so
// the access log records the original POST + override-header pair,
// but before any middleware or handler that routes on `r.Method`.
// In `gateway/server.go::wrapMiddleware` it sits between the
// loopback-tag middleware and the request-log layer; see that
// function for the canonical chain order.
func WithMethodOverride(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
raw := r.Header.Get(methodOverrideHeader)
if raw == "" {
next.ServeHTTP(w, r)
return
}
if r.Method != http.MethodPost {
writeMethodOverrideError(w,
"X-HTTP-Method-Override is only valid on POST requests; "+
"received "+r.Method)
return
}
upper := strings.ToUpper(strings.TrimSpace(raw))
switch upper {
case http.MethodPatch, http.MethodPut, http.MethodDelete:
r.Method = upper
default:
writeMethodOverrideError(w,
"X-HTTP-Method-Override must be one of PATCH, PUT, "+
"or DELETE; received "+raw)
return
}
next.ServeHTTP(w, r)
})
}
// writeMethodOverrideError emits a BigQuery-shaped 400 envelope.
// Delegates to the shared [writeJSONError] helper so the envelope
// shape stays in lockstep with [writeGunzipError]; consolidating the
// JSON layout in one place also keeps the goconst linter from
// flagging recurring `"invalid"` / `"message"` literals once a third
// middleware needs to emit a 400.
func writeMethodOverrideError(w http.ResponseWriter, msg string) {
writeJSONError(w, http.StatusBadRequest, errReasonInvalid, msg)
}
package models
import (
"regexp"
"strconv"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
var modelTypeRE = regexp.MustCompile(`(?i)model_type\s*=\s*'([^']+)'`)
const defaultModelType = "LINEAR_REG"
// RegisterFromDDL parses CREATE MODEL DDL and upserts metadata into store.
func RegisterFromDDL(store *Store, projectID, defaultDatasetID, sql string) *bqtypes.ModelReference {
m, ok := parseCreateModelDDL(projectID, defaultDatasetID, sql)
if !ok {
return nil
}
store.Upsert(m)
ref := m.ModelReference
return &ref
}
func parseCreateModelDDL(projectID, defaultDatasetID, sql string) (bqtypes.Model, bool) {
trimmed := strings.TrimSpace(sql)
upper := strings.ToUpper(trimmed)
switch {
case strings.HasPrefix(upper, "CREATE OR REPLACE MODEL"):
trimmed = strings.TrimSpace(trimmed[len("CREATE OR REPLACE MODEL"):])
case strings.HasPrefix(upper, "CREATE MODEL IF NOT EXISTS"):
trimmed = strings.TrimSpace(trimmed[len("CREATE MODEL IF NOT EXISTS"):])
case strings.HasPrefix(upper, "CREATE MODEL"):
trimmed = strings.TrimSpace(trimmed[len("CREATE MODEL"):])
default:
return bqtypes.Model{}, false
}
name, _, ok := parseQuotedOrBareName(trimmed)
if !ok {
return bqtypes.Model{}, false
}
pID, dID, mID := splitModelName(projectID, defaultDatasetID, name)
modelType := defaultModelType
if m := modelTypeRE.FindStringSubmatch(sql); len(m) == 2 {
modelType = strings.ToUpper(strings.TrimSpace(m[1]))
}
now := nowMillis()
return bqtypes.Model{
ModelReference: bqtypes.ModelReference{
ProjectID: pID,
DatasetID: dID,
ModelID: mID,
},
ModelType: modelType,
CreationTime: now,
LastModifiedTime: now,
Etag: MintEtag(),
}, true
}
func splitModelName(projectID, defaultDatasetID, name string) (pID, dID, mID string) {
parts := strings.Split(name, ".")
switch len(parts) {
case 3:
return parts[0], parts[1], parts[2]
case 2:
return projectID, parts[0], parts[1]
default:
return projectID, defaultDatasetID, strings.Trim(parts[0], "`")
}
}
func parseQuotedOrBareName(rest string) (name, tail string, ok bool) {
rest = strings.TrimSpace(rest)
if rest == "" {
return "", "", false
}
if rest[0] == '`' {
end := strings.Index(rest[1:], "`")
if end < 0 {
return "", "", false
}
name = rest[1 : end+1]
return name, strings.TrimSpace(rest[end+2:]), true
}
idx := strings.IndexAny(rest, " \t\n\r(")
if idx < 0 {
return rest, "", true
}
return rest[:idx], strings.TrimSpace(rest[idx:]), true
}
func nowMillis() string {
return strconv.FormatInt(time.Now().UTC().UnixMilli(), 10)
}
// Package models is the gateway-side in-memory registry of BigQuery ML
// model metadata registered by CREATE MODEL DDL stubs. REST handlers
// and query jobs register models here so client libraries can round-trip
// list/get/delete without a trained-model store.
package models
import (
"crypto/rand"
"encoding/hex"
"maps"
"slices"
"strings"
"sync"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// Store holds model metadata keyed by projectId:datasetId.modelId.
type Store struct {
mu sync.RWMutex
models map[string]bqtypes.Model
}
// NewStore returns an empty model registry.
func NewStore() *Store {
return &Store{models: map[string]bqtypes.Model{}}
}
func modelKey(projectID, datasetID, modelID string) string {
return projectID + ":" + datasetID + "." + modelID
}
// Upsert registers or replaces model metadata.
func (s *Store) Upsert(m bqtypes.Model) {
if s == nil {
return
}
ref := m.ModelReference
key := modelKey(ref.ProjectID, ref.DatasetID, ref.ModelID)
s.mu.Lock()
defer s.mu.Unlock()
s.models[key] = cloneModel(m)
}
// Get returns a model snapshot and whether it was found.
func (s *Store) Get(projectID, datasetID, modelID string) (bqtypes.Model, bool) {
if s == nil {
return bqtypes.Model{}, false
}
s.mu.RLock()
defer s.mu.RUnlock()
m, ok := s.models[modelKey(projectID, datasetID, modelID)]
return cloneModel(m), ok
}
// Delete removes a model. Returns false when absent.
func (s *Store) Delete(projectID, datasetID, modelID string) bool {
if s == nil {
return false
}
s.mu.Lock()
defer s.mu.Unlock()
key := modelKey(projectID, datasetID, modelID)
if _, ok := s.models[key]; !ok {
return false
}
delete(s.models, key)
return true
}
// List returns every model in the dataset, optionally filtered by a
// BigQuery list filter string (only `model_id=<id>` is supported today).
func (s *Store) List(projectID, datasetID, filter string) []bqtypes.Model {
if s == nil {
return nil
}
wantID := parseModelIDFilter(filter)
prefix := projectID + ":" + datasetID + "."
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]bqtypes.Model, 0)
for key, m := range s.models {
if !strings.HasPrefix(key, prefix) {
continue
}
if wantID != "" && m.ModelReference.ModelID != wantID {
continue
}
out = append(out, cloneModel(m))
}
slices.SortFunc(out, func(a, b bqtypes.Model) int {
return strings.Compare(a.ModelReference.ModelID, b.ModelReference.ModelID)
})
return out
}
func parseModelIDFilter(filter string) string {
filter = strings.TrimSpace(filter)
if filter == "" {
return ""
}
const prefix = "model_id="
if strings.HasPrefix(filter, prefix) {
return strings.TrimSpace(filter[len(prefix):])
}
return ""
}
// MintEtag returns a random etag for optimistic concurrency.
func MintEtag() string {
var b [8]byte
_, _ = rand.Read(b[:])
return hex.EncodeToString(b[:])
}
func cloneModel(m bqtypes.Model) bqtypes.Model {
out := m
if len(m.Labels) > 0 {
out.Labels = make(map[string]string, len(m.Labels))
maps.Copy(out.Labels, m.Labels)
}
return out
}
package query
import (
"errors"
"fmt"
"regexp"
"strconv"
"strings"
"time"
)
// backtickDecoratedRE matches `project.dataset.table@123` or `dataset.table@-3600000`.
var backtickDecoratedRE = regexp.MustCompile("`([^`]+)@(-?[0-9]+)`")
// LowerTableDecorators rewrites BigQuery table time decorators embedded in
// backtick table paths to FOR SYSTEM_TIME AS OF, matching the engine's
// historical read path. Relative offsets (@-3600000) are resolved against
// the current UTC clock at rewrite time.
func LowerTableDecorators(sql string) (string, error) {
trim := strings.TrimSpace(sql)
if trim == "" {
return sql, nil
}
if hasDecoratorConflict(trim) {
return "", errors.New(
"cannot use table decorator with FOR SYSTEM_TIME AS OF")
}
return backtickDecoratedRE.ReplaceAllStringFunc(sql, func(match string) string {
parts := backtickDecoratedRE.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
base := parts[1]
raw := parts[2]
epoch, err := resolveDecoratorEpoch(raw)
if err != nil {
return match
}
return fmt.Sprintf("`%s` FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
base, epoch)
}), nil
}
func hasDecoratorConflict(sql string) bool {
upper := strings.ToUpper(sql)
if !strings.Contains(upper, "FOR SYSTEM_TIME AS OF") {
return false
}
return backtickDecoratedRE.MatchString(sql) ||
legacyBracketDecoratorRE.MatchString(sql)
}
func resolveDecoratorEpoch(raw string) (int64, error) {
if strings.HasPrefix(raw, "-") {
offset, err := strconv.ParseInt(raw, 10, 64)
if err != nil {
return 0, err
}
return time.Now().UTC().UnixMilli() + offset, nil
}
return strconv.ParseInt(raw, 10, 64)
}
package query
import (
"context"
"errors"
"fmt"
"regexp"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/load"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
)
// MetadataStore mirrors the handlers.MetadataStore surface needed to
// stash REST-only destination table metadata without an import cycle.
type MetadataStore interface {
MergeTable(projectID, datasetID, tableID string, patch bqtypes.Table)
}
const implicitDestDatasetID = "_bqemu_query_results"
var nonIdentRE = regexp.MustCompile(`[^a-zA-Z0-9_]+`)
const (
writeTruncate = "WRITE_TRUNCATE"
writeEmpty = "WRITE_EMPTY"
writeAppend = "WRITE_APPEND"
)
// AppendResultsFromQueryRequest writes jobs.query output into an
// explicit destinationTable when the synchronous QueryRequest carries
// destination metadata (authViewTutorial, client.query with destination).
func AppendResultsFromQueryRequest(ctx context.Context, catalog enginepb.CatalogClient,
req *bqtypes.QueryRequest, projectID string,
resultSchema *bqtypes.TableSchema, rows []bqtypes.Row,
) error {
if req == nil || req.DestinationTable == nil || req.DestinationTable.TableID == "" {
return nil
}
cfg := &jobs.JobConfigurationQuery{
DestinationTable: req.DestinationTable,
WriteDisposition: req.WriteDisposition,
SchemaUpdateOptions: req.SchemaUpdateOptions,
}
return AppendResults(ctx, catalog, cfg, projectID, resultSchema, rows)
}
// AppendResults writes synchronous query output into
// configuration.query.destinationTable when the job requests a
// destination table. Schema update options on WRITE_APPEND merge new
// columns or relax REQUIRED fields before rows are inserted.
func AppendResults(ctx context.Context, catalog enginepb.CatalogClient,
cfg *jobs.JobConfigurationQuery, projectID string,
resultSchema *bqtypes.TableSchema, rows []bqtypes.Row,
) error {
if cfg == nil || cfg.DestinationTable == nil || cfg.DestinationTable.TableID == "" {
return nil
}
if catalog == nil {
return errors.New("query destination requires Catalog client")
}
wd := cfg.WriteDisposition
if wd == "" {
wd = writeTruncate
}
destProject := cfg.DestinationTable.ProjectID
if destProject == "" {
destProject = projectID
}
destDataset := cfg.DestinationTable.DatasetID
destTable := cfg.DestinationTable.TableID
tableRef := &enginepb.TableRef{
ProjectId: destProject,
DatasetId: destDataset,
TableId: destTable,
}
protoResult := load.SchemaToProto(resultSchema)
if err := load.EnsureDataset(ctx, catalog, destProject, destDataset); err != nil {
return err
}
if len(rows) == 0 {
return appendEmptyQueryDestination(ctx, catalog, cfg, wd, destProject, destDataset, destTable,
tableRef, resultSchema, protoResult)
}
protoSchema, err := resolveDestinationProtoSchema(ctx, catalog, cfg, wd, destProject, destDataset,
destTable, tableRef, resultSchema, protoResult)
if err != nil {
return err
}
return insertDestinationRows(ctx, catalog, destProject, destDataset, destTable, protoSchema,
resultSchema, rows)
}
func appendEmptyQueryDestination(
ctx context.Context,
catalog enginepb.CatalogClient,
cfg *jobs.JobConfigurationQuery,
wd, destProject, destDataset, destTable string,
tableRef *enginepb.TableRef,
resultSchema *bqtypes.TableSchema,
protoResult *enginepb.TableSchema,
) error {
switch wd {
case writeTruncate, writeEmpty:
return load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
wd, protoResult)
case writeAppend:
if len(cfg.SchemaUpdateOptions) == 0 {
return nil
}
if err := load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
writeAppend, protoResult); err != nil {
return fmt.Errorf("ensure query destination table: %w", err)
}
if _, err := load.ApplySchemaUpdate(ctx, catalog, tableRef, resultSchema, cfg.SchemaUpdateOptions); err != nil {
return err
}
return nil
default:
return nil
}
}
func resolveDestinationProtoSchema(
ctx context.Context,
catalog enginepb.CatalogClient,
cfg *jobs.JobConfigurationQuery,
wd, destProject, destDataset, destTable string,
tableRef *enginepb.TableRef,
resultSchema *bqtypes.TableSchema,
protoResult *enginepb.TableSchema,
) (*enginepb.TableSchema, error) {
var protoSchema *enginepb.TableSchema
switch wd {
case writeAppend:
if err := load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
writeAppend, protoResult); err != nil {
return nil, fmt.Errorf("ensure query destination table: %w", err)
}
var err error
protoSchema, err = load.ApplySchemaUpdate(ctx, catalog, tableRef, resultSchema, cfg.SchemaUpdateOptions)
if err != nil {
return nil, err
}
case writeTruncate, writeEmpty:
if err := load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
wd, protoResult); err != nil {
return nil, fmt.Errorf("ensure query destination table: %w", err)
}
protoSchema = protoResult
default:
return nil, fmt.Errorf("query destination writeDisposition %q is not supported", wd)
}
if protoSchema == nil {
desc, derr := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
if derr != nil {
return nil, fmt.Errorf("describe destination table: %w", derr)
}
protoSchema = desc.GetSchema()
}
return protoSchema, nil
}
func insertDestinationRows(
ctx context.Context,
catalog enginepb.CatalogClient,
destProject, destDataset, destTable string,
protoSchema *enginepb.TableSchema,
resultSchema *bqtypes.TableSchema,
rows []bqtypes.Row,
) error {
ref := seed.TableRef{ProjectID: destProject, DatasetID: destDataset, TableID: destTable}
applier := seed.NewCatalogApplier(catalog)
rowMaps := restRowsToMaps(resultSchema, rows)
if _, err := applier.InsertRows(ctx, ref, protoSchema, rowMaps); err != nil {
return fmt.Errorf("query destination insert rows: %w", err)
}
return nil
}
// PersistDestinationMetadata stashes REST-only destination metadata
// (clustering, CMEK, time partitioning) so tables.get round-trips what
// the query job supplied.
func PersistDestinationMetadata(store MetadataStore, cfg *jobs.JobConfigurationQuery, projectID string) {
if store == nil || cfg == nil || cfg.DestinationTable == nil {
return
}
if cfg.Clustering == nil && cfg.TimePartitioning == nil &&
cfg.DestinationEncryptionConfiguration == nil {
return
}
destProject := cfg.DestinationTable.ProjectID
if destProject == "" {
destProject = projectID
}
patch := bqtypes.Table{
Clustering: cfg.Clustering,
TimePartitioning: cfg.TimePartitioning,
EncryptionConfiguration: cfg.DestinationEncryptionConfiguration,
}
store.MergeTable(destProject, cfg.DestinationTable.DatasetID,
cfg.DestinationTable.TableID, patch)
}
// MaterializeImplicitDestination registers an anonymous results table for
// SELECT jobs that omit destinationTable so clients can read
// query_job.destination and list_rows for pagination samples.
func MaterializeImplicitDestination(ctx context.Context, catalog enginepb.CatalogClient,
projectID, defaultDatasetID, jobID string,
resultSchema *bqtypes.TableSchema, rows []bqtypes.Row,
) (*bqtypes.TableReference, error) {
if catalog == nil || resultSchema == nil || len(rows) == 0 {
return nil, errors.New("implicit destination requires catalog, schema, and rows")
}
datasetID := strings.TrimSpace(defaultDatasetID)
if datasetID == "" {
datasetID = implicitDestDatasetID
}
tableID := sanitizeJobTableID(jobID)
if err := load.EnsureDataset(ctx, catalog, projectID, datasetID); err != nil {
return nil, err
}
protoSchema := load.SchemaToProto(resultSchema)
if err := load.EnsureDestinationTable(ctx, catalog, projectID, datasetID, tableID,
writeTruncate, protoSchema); err != nil {
return nil, err
}
ref := seed.TableRef{ProjectID: projectID, DatasetID: datasetID, TableID: tableID}
applier := seed.NewCatalogApplier(catalog)
if _, err := applier.InsertRows(ctx, ref, protoSchema, restRowsToMaps(resultSchema, rows)); err != nil {
return nil, err
}
return &bqtypes.TableReference{
ProjectID: projectID,
DatasetID: datasetID,
TableID: tableID,
}, nil
}
func sanitizeJobTableID(jobID string) string {
id := nonIdentRE.ReplaceAllString(jobID, "_")
if id == "" {
return "query_results"
}
return id
}
func restRowsToMaps(schema *bqtypes.TableSchema, rows []bqtypes.Row) []map[string]any {
if schema == nil || len(rows) == 0 {
return nil
}
out := make([]map[string]any, 0, len(rows))
for _, row := range rows {
m := make(map[string]any, len(schema.Fields))
for i, f := range schema.Fields {
if i < len(row.F) {
m[f.Name] = restFieldValue(f, row.F[i])
}
}
out = append(out, m)
}
return out
}
// restFieldValue converts a REST query-result cell into the map-shaped
// value seed.InsertRows expects. STRUCT columns arrive as nested Row
// objects (positional f/v), not map[string]any.
func restFieldValue(f bqtypes.TableFieldSchema, c bqtypes.Cell) any {
if isRESTStructFieldType(f.Type) {
if nested, ok := c.V.(bqtypes.Row); ok {
m := make(map[string]any, len(f.Fields))
for j, sub := range f.Fields {
if j < len(nested.F) {
m[sub.Name] = restFieldValue(sub, nested.F[j])
}
}
return m
}
}
if strings.EqualFold(f.Mode, "REPEATED") {
if arr, ok := c.V.([]bqtypes.Cell); ok {
elem := f
elem.Mode = ""
vals := make([]any, len(arr))
for i, el := range arr {
vals[i] = restFieldValue(elem, el)
}
return vals
}
}
return c.V
}
func isRESTStructFieldType(t string) bool {
switch strings.ToUpper(strings.TrimSpace(t)) {
case "STRUCT", "RECORD":
return true
default:
return false
}
}
package query
import (
"context"
"errors"
"fmt"
"regexp"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/jobs"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
)
var infoSchemaJobsFromRE = regexp.MustCompile(
"(?i)`(?:[^`]+`\\.)+`[^`]*INFORMATION_SCHEMA\\.(?:JOBS_BY_PROJECT|JOBS)`?")
// ReferencesInfoSchemaJobs reports whether sql reads from JOBS / JOBS_BY_PROJECT.
func ReferencesInfoSchemaJobs(sql string) bool {
return infoSchemaJobsFromRE.MatchString(sql)
}
// RewriteInfoSchemaJobsSQL replaces INFORMATION_SCHEMA.JOBS* table refs with
// the gateway materialized catalog table for the request project.
func RewriteInfoSchemaJobsSQL(sql, projectID string) string {
repl := fmt.Sprintf("`%s`.`%s`.`%s`", projectID, jobs.InfoSchemaJobsDataset, jobs.InfoSchemaJobsTable)
return infoSchemaJobsFromRE.ReplaceAllString(sql, repl)
}
// PrepareInfoSchemaJobsSnapshot registers/refreshes the internal jobs table
// before forwarding a rewritten query to the engine.
func PrepareInfoSchemaJobsSnapshot(
ctx context.Context,
catalog enginepb.CatalogClient,
reg *jobs.Registry,
projectID string,
) error {
if catalog == nil {
return errors.New("info schema jobs: engine catalog required")
}
if reg == nil {
return errors.New("info schema jobs: job registry required")
}
applier := seed.NewCatalogApplier(catalog)
if _, err := applier.EnsureDataset(ctx, projectID, jobs.InfoSchemaJobsDataset, "US"); err != nil {
return err
}
tableRef := seed.TableRef{
ProjectID: projectID,
DatasetID: jobs.InfoSchemaJobsDataset,
TableID: jobs.InfoSchemaJobsTable,
}
schema := jobs.InfoSchemaJobsSchema()
engineTable := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: jobs.InfoSchemaJobsDataset,
TableId: jobs.InfoSchemaJobsTable,
}
if _, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: engineTable}); err == nil {
if _, dropErr := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: engineTable}); dropErr != nil {
return fmt.Errorf("info schema jobs drop: %w", dropErr)
}
}
if _, err := applier.EnsureTable(ctx, tableRef, schema); err != nil {
return err
}
rows := jobs.InfoSchemaJobRows(reg, projectID)
if len(rows) == 0 {
return nil
}
if _, err := applier.InsertRows(ctx, tableRef, schema, rows); err != nil {
return fmt.Errorf("info schema jobs insert: %w", err)
}
return nil
}
// PrepareEngineSQLForJobs rewrites JOBS* queries and refreshes the snapshot table.
func PrepareEngineSQLForJobs(
ctx context.Context,
catalog enginepb.CatalogClient,
reg *jobs.Registry,
projectID, sql string,
) (string, error) {
if !ReferencesInfoSchemaJobs(sql) {
return sql, nil
}
if err := PrepareInfoSchemaJobsSnapshot(ctx, catalog, reg, projectID); err != nil {
return "", err
}
return RewriteInfoSchemaJobsSQL(sql, projectID), nil
}
package query
import (
"errors"
"fmt"
"regexp"
"strings"
)
// legacyBracketTableRE matches legacy SQL table references of the form
// [project:dataset.table] used by thirdparty Node/Python samples.
var legacyBracketTableRE = regexp.MustCompile(
`\[([a-zA-Z0-9_-]+):([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\]`)
// legacyBracketDecoratorRE matches legacy snapshot decorators
// [project:dataset.table@epoch] or [project:dataset.table@-offset].
var legacyBracketDecoratorRE = regexp.MustCompile(
`\[([a-zA-Z0-9_-]+):([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)@(-?[0-9]+)\]`)
// legacyBareTableRE matches [dataset.table] when no project is given.
var legacyBareTableRE = regexp.MustCompile(`\[([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\]`)
// legacyBareDecoratorRE matches [dataset.table@epoch] without a project.
var legacyBareDecoratorRE = regexp.MustCompile(
`\[([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)@(-?[0-9]+)\]`)
// PrepareEngineSQL translates limited legacy SQL to GoogleSQL when
// useLegacy is true. The engine only accepts GoogleSQL; callers must
// clear UseLegacySql on the forwarded enginepb.QueryRequest.
func PrepareEngineSQL(useLegacy bool, sql, projectID, defaultDataset string) (string, error) {
if useLegacy {
normalized, err := NormalizeLegacySQL(sql, projectID, defaultDataset)
if err != nil {
return "", err
}
return LowerTableDecorators(normalized)
}
return LowerTableDecorators(sql)
}
// NormalizeLegacySQL rewrites bracket-style legacy table references to
// GoogleSQL backtick form. Full legacy SQL dialect is not supported.
func NormalizeLegacySQL(sql, projectID, defaultDataset string) (string, error) {
if strings.TrimSpace(sql) == "" {
return "", errors.New("legacy SQL query is empty")
}
if hasDecoratorConflict(sql) {
return "", errors.New(
"cannot use table decorator with FOR SYSTEM_TIME AS OF")
}
out := legacyBracketDecoratorRE.ReplaceAllStringFunc(sql, func(match string) string {
parts := legacyBracketDecoratorRE.FindStringSubmatch(match)
if len(parts) != 5 {
return match
}
epoch, err := resolveDecoratorEpoch(parts[4])
if err != nil {
return match
}
return fmt.Sprintf("`%s.%s.%s` FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
parts[1], parts[2], parts[3], epoch)
})
out = legacyBracketTableRE.ReplaceAllStringFunc(out, func(match string) string {
parts := legacyBracketTableRE.FindStringSubmatch(match)
if len(parts) != 4 {
return match
}
return fmt.Sprintf("`%s.%s.%s`", parts[1], parts[2], parts[3])
})
if legacyBracketTableRE.MatchString(out) {
return "", errors.New("legacy SQL contains unsupported table reference syntax")
}
if legacyBareDecoratorRE.MatchString(out) {
var err error
out, err = normalizeLegacyBareDecorators(out, projectID)
if err != nil {
return "", err
}
}
if legacyBareTableRE.MatchString(out) {
var err error
out, err = normalizeLegacyBareTables(out, projectID)
if err != nil {
return "", err
}
}
_ = defaultDataset // reserved for future bare-table defaulting
return out, nil
}
func normalizeLegacyBareDecorators(out, projectID string) (string, error) {
project := strings.TrimSpace(projectID)
if project == "" {
return "", errors.New("legacy SQL [dataset.table@epoch] requires a project context")
}
return legacyBareDecoratorRE.ReplaceAllStringFunc(out, func(match string) string {
parts := legacyBareDecoratorRE.FindStringSubmatch(match)
if len(parts) != 4 {
return match
}
epoch, err := resolveDecoratorEpoch(parts[3])
if err != nil {
return match
}
return fmt.Sprintf("`%s.%s.%s` FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
project, parts[1], parts[2], epoch)
}), nil
}
func normalizeLegacyBareTables(out, projectID string) (string, error) {
project := strings.TrimSpace(projectID)
if project == "" {
return "", errors.New("legacy SQL [dataset.table] requires a project context")
}
return legacyBareTableRE.ReplaceAllStringFunc(out, func(match string) string {
parts := legacyBareTableRE.FindStringSubmatch(match)
if len(parts) != 3 {
return match
}
return fmt.Sprintf("`%s.%s.%s`", project, parts[1], parts[2])
}), nil
}
package routines
import (
"strings"
"unicode"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
const (
routineTypeScalarFunction = "SCALAR_FUNCTION"
routineTypeTableFunction = "TABLE_VALUED_FUNCTION"
routineTypeProcedure = "PROCEDURE"
routineLanguageSQL = "SQL"
sqlTypeArray = "ARRAY"
sqlTypeStruct = "STRUCT"
sqlTypeAnyType = "ANY TYPE"
)
// RegisterFromDDL parses CREATE FUNCTION / CREATE PROCEDURE DDL and
// upserts the routine into store. Returns the target reference when
// registration succeeds.
func RegisterFromDDL(store *Store, projectID, defaultDatasetID, sql string) *bqtypes.RoutineReference {
rt, ok := parseCreateRoutineDDL(projectID, defaultDatasetID, sql)
if !ok {
return nil
}
store.Upsert(rt)
ref := rt.RoutineReference
return &ref
}
// ParseCreateRoutineDDL parses CREATE FUNCTION / PROCEDURE DDL into a
// Routine snapshot (used by REST/catalog round-trip helpers).
func ParseCreateRoutineDDL(projectID, defaultDatasetID, sql string) (bqtypes.Routine, bool) {
return parseCreateRoutineDDL(projectID, defaultDatasetID, sql)
}
func routineLanguageFromDDL(sql string) bqtypes.RoutineLanguage {
upper := strings.ToUpper(sql)
switch {
case strings.Contains(upper, "LANGUAGE PYTHON"):
return bqtypes.RoutineLanguage("PYTHON")
case strings.Contains(upper, "LANGUAGE JS"):
return bqtypes.RoutineLanguage("JS")
default:
return routineLanguageSQL
}
}
func parseCreateRoutineDDL(projectID, defaultDatasetID, sql string) (bqtypes.Routine, bool) {
rest, routineType, ok := stripCreateRoutineHeader(sql)
if !ok {
return bqtypes.Routine{}, false
}
name, rest, ok := parseQuotedName(rest)
if !ok {
return bqtypes.Routine{}, false
}
pID, dID, rID := splitRoutineName(projectID, defaultDatasetID, name)
args, returnType, body, ok := parseRoutineSignature(rest)
if !ok {
return bqtypes.Routine{}, false
}
now := nowMillis()
rt := bqtypes.Routine{
Etag: MintEtag(),
RoutineReference: bqtypes.RoutineReference{
ProjectID: pID,
DatasetID: dID,
RoutineID: rID,
},
RoutineType: bqtypes.RoutineType(routineType),
Language: routineLanguageFromDDL(sql),
Arguments: args,
ReturnType: returnType,
DefinitionBody: body,
CreationTime: now,
LastModifiedTime: now,
}
if opts := parsePythonOptionsFromDDL(sql); opts != nil {
rt.PythonOptions = opts
}
return rt, true
}
func stripCreateRoutineHeader(sql string) (rest, routineType string, ok bool) {
trimmed := strings.TrimSpace(sql)
upper := strings.ToUpper(trimmed)
switch {
case strings.HasPrefix(upper, "CREATE OR REPLACE FUNCTION"),
strings.HasPrefix(upper, "CREATE FUNCTION"):
routineType = routineTypeScalarFunction
case strings.HasPrefix(upper, "CREATE OR REPLACE TABLE FUNCTION"),
strings.HasPrefix(upper, "CREATE TABLE FUNCTION"):
routineType = routineTypeTableFunction
case strings.HasPrefix(upper, "CREATE OR REPLACE PROCEDURE"),
strings.HasPrefix(upper, "CREATE PROCEDURE"):
routineType = routineTypeProcedure
default:
return "", "", false
}
rest = trimmed
for _, prefix := range []string{
"CREATE OR REPLACE TABLE FUNCTION",
"CREATE TABLE FUNCTION",
"CREATE OR REPLACE FUNCTION",
"CREATE FUNCTION",
"CREATE OR REPLACE PROCEDURE",
"CREATE PROCEDURE",
} {
if len(rest) >= len(prefix) && strings.EqualFold(rest[:len(prefix)], prefix) {
return strings.TrimSpace(rest[len(prefix):]), routineType, true
}
}
return "", "", false
}
func skipLanguageAndOptions(rest string) string {
rest = strings.TrimSpace(rest)
for {
upper := strings.ToUpper(rest)
if strings.HasPrefix(upper, "LANGUAGE") {
rest = strings.TrimSpace(rest[len("LANGUAGE"):])
for len(rest) > 0 && !unicode.IsSpace(rune(rest[0])) {
rest = rest[1:]
}
rest = strings.TrimSpace(rest)
continue
}
if strings.HasPrefix(upper, "OPTIONS") {
rest = strings.TrimSpace(rest[len("OPTIONS"):])
if !strings.HasPrefix(rest, "(") {
break
}
_, rest, _ = scanBalanced(rest, '(', ')')
rest = strings.TrimSpace(rest)
continue
}
break
}
return rest
}
func parseRoutineSignature(rest string) (args []bqtypes.RoutineArgument,
returnType *bqtypes.StandardSqlDataType, body string, ok bool,
) {
if !strings.HasPrefix(rest, "(") {
return nil, nil, "", false
}
argsRaw, rest, ok := scanBalanced(rest, '(', ')')
if !ok {
return nil, nil, "", false
}
args, _ = parseArgumentList(strings.TrimSpace(argsRaw))
rest = strings.TrimSpace(rest)
if strings.HasPrefix(strings.ToUpper(rest), "RETURNS") {
rest = strings.TrimSpace(rest[len("RETURNS"):])
typeRaw, consumed, typed := scanSQLType(rest)
if !typed {
return nil, nil, "", false
}
returnType = typeRaw
rest = strings.TrimSpace(rest[consumed:])
}
rest = skipLanguageAndOptions(rest)
rest = strings.TrimSpace(rest)
if !strings.HasPrefix(strings.ToUpper(rest), "AS") {
return nil, nil, "", false
}
rest = strings.TrimSpace(rest[len("AS"):])
body, ok = parseDefinitionBody(rest)
if !ok || body == "" {
return nil, nil, "", false
}
return args, returnType, body, true
}
func parseQuotedName(s string) (name, rest string, ok bool) {
s = strings.TrimSpace(s)
if len(s) == 0 {
return "", "", false
}
if s[0] == '`' {
end := strings.Index(s[1:], "`")
if end < 0 {
return "", "", false
}
return s[1 : end+1], strings.TrimSpace(s[end+2:]), true
}
// Unquoted identifier: read until '(' or whitespace boundary.
i := 0
for i < len(s) && !unicode.IsSpace(rune(s[i])) && s[i] != '(' {
i++
}
if i == 0 {
return "", "", false
}
return s[:i], strings.TrimSpace(s[i:]), true
}
func splitRoutineName(projectID, defaultDatasetID, name string) (project, dataset, routine string) {
parts := strings.Split(name, ".")
switch len(parts) {
case 1:
return projectID, defaultDatasetID, parts[0]
case 2:
return projectID, parts[0], parts[1]
default:
return parts[0], parts[1], parts[len(parts)-1]
}
}
func scanBalanced(s string, open, close byte) (inner, rest string, ok bool) {
if len(s) == 0 || s[0] != open {
return "", "", false
}
depth := 0
angle := 0
for i := 0; i < len(s); i++ {
switch s[i] {
case '<':
angle++
case '>':
if angle > 0 {
angle--
}
case open:
if angle == 0 {
depth++
}
case close:
if angle == 0 {
depth--
if depth == 0 {
return s[1:i], strings.TrimSpace(s[i+1:]), true
}
}
}
}
return "", "", false
}
func parseArgumentList(raw string) ([]bqtypes.RoutineArgument, bool) {
if raw == "" {
return nil, true
}
var out []bqtypes.RoutineArgument
for len(raw) > 0 {
raw = strings.TrimSpace(raw)
if raw == "" {
break
}
nameEnd := 0
for nameEnd < len(raw) && (unicode.IsLetter(rune(raw[nameEnd])) ||
unicode.IsDigit(rune(raw[nameEnd])) || raw[nameEnd] == '_') {
nameEnd++
}
if nameEnd == 0 {
return nil, false
}
name := raw[:nameEnd]
raw = strings.TrimSpace(raw[nameEnd:])
typ, consumed, ok := scanSQLType(raw)
if !ok {
return nil, false
}
out = append(out, bqtypes.RoutineArgument{
Name: name,
DataType: typ,
})
raw = strings.TrimSpace(raw[consumed:])
if raw == "" {
break
}
if raw[0] != ',' {
return nil, false
}
raw = strings.TrimSpace(raw[1:])
}
return out, true
}
func scanSQLType(s string) (*bqtypes.StandardSqlDataType, int, bool) {
s = strings.TrimSpace(s)
if s == "" {
return nil, 0, false
}
upper := strings.ToUpper(s)
switch {
case strings.HasPrefix(upper, "ANY TYPE"):
return &bqtypes.StandardSqlDataType{
TypeKind: bqtypes.SQLTypeKind("ANY TYPE"),
}, len("ANY TYPE"), true
case strings.HasPrefix(upper, "ARRAY<"):
inner, consumed, ok := scanAngleInner(s[len("ARRAY<"):])
if !ok {
return nil, 0, false
}
elem, _, ok := scanSQLType(inner)
if !ok {
return nil, 0, false
}
total := len("ARRAY<") + consumed
return &bqtypes.StandardSqlDataType{
TypeKind: bqtypes.SQLTypeKind(sqlTypeArray),
ArrayElementType: elem,
}, total, true
case strings.HasPrefix(upper, "STRUCT<"):
inner, consumed, ok := scanAngleInner(s[len("STRUCT<"):])
if !ok {
return nil, 0, false
}
fields, ok := parseStructFields(inner)
if !ok {
return nil, 0, false
}
total := len("STRUCT<") + consumed
return &bqtypes.StandardSqlDataType{
TypeKind: bqtypes.SQLTypeKind(sqlTypeStruct),
StructType: &bqtypes.StandardSqlStructType{
Fields: fields,
},
}, total, true
default:
end := 0
for end < len(s) && (unicode.IsLetter(rune(s[end])) ||
unicode.IsDigit(rune(s[end])) || s[end] == '_') {
end++
}
if end == 0 {
return nil, 0, false
}
return &bqtypes.StandardSqlDataType{
TypeKind: bqtypes.SQLTypeKind(strings.ToUpper(s[:end])),
}, end, true
}
}
func scanAngleInner(s string) (inner string, consumed int, ok bool) {
depth := 1
for i := 0; i < len(s); i++ {
switch s[i] {
case '<':
depth++
case '>':
depth--
if depth == 0 {
return s[:i], i + 1, true
}
}
}
return "", 0, false
}
func parseStructFields(raw string) ([]bqtypes.StandardSqlField, bool) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, true
}
var out []bqtypes.StandardSqlField
for len(raw) > 0 {
raw = strings.TrimSpace(raw)
nameEnd := 0
for nameEnd < len(raw) && (unicode.IsLetter(rune(raw[nameEnd])) ||
unicode.IsDigit(rune(raw[nameEnd])) || raw[nameEnd] == '_') {
nameEnd++
}
if nameEnd == 0 {
return nil, false
}
name := raw[:nameEnd]
raw = strings.TrimSpace(raw[nameEnd:])
typ, consumed, ok := scanSQLType(raw)
if !ok {
return nil, false
}
out = append(out, bqtypes.StandardSqlField{
Name: name,
Type: *typ,
})
raw = strings.TrimSpace(raw[consumed:])
if raw == "" {
break
}
if raw[0] != ',' {
return nil, false
}
raw = strings.TrimSpace(raw[1:])
}
return out, true
}
func parseDefinitionBody(s string) (string, bool) {
s = strings.TrimSpace(s)
if len(s) == 0 {
return "", false
}
if s[0] == '(' {
inner, _, ok := scanBalanced(s, '(', ')')
return strings.TrimSpace(inner), ok
}
// Language-specific quoted bodies (JavaScript / Python UDFs) — take
// the first quoted span verbatim.
if s[0] == 'r' && len(s) > 1 && (s[1] == '\'' || s[1] == '"') {
s = s[1:]
}
if len(s) >= 3 && (s[0] == '\'' || s[0] == '"') && s[0] == s[1] && s[1] == s[2] {
quote := s[0]
end := strings.Index(s[3:], strings.Repeat(string(quote), 3))
if end < 0 {
return "", false
}
return s[3 : 3+end], true
}
if s[0] == '\'' || s[0] == '"' {
quote := s[0]
var b strings.Builder
escaped := false
for i := 1; i < len(s); i++ {
c := s[i]
if escaped {
b.WriteByte(c)
escaped = false
continue
}
if c == '\\' {
escaped = true
continue
}
if c == quote {
return b.String(), true
}
b.WriteByte(c)
}
return "", false
}
return s, true
}
package routines
import (
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
func parsePythonOptionsFromDDL(sql string) *bqtypes.PythonOptions {
upper := strings.ToUpper(sql)
pos := strings.Index(upper, "OPTIONS")
if pos < 0 {
return nil
}
rest := strings.TrimSpace(sql[pos+len("OPTIONS"):])
if !strings.HasPrefix(rest, "(") {
return nil
}
inner, _, ok := scanBalanced(rest, '(', ')')
if !ok {
return nil
}
opts := &bqtypes.PythonOptions{}
for part := range strings.SplitSeq(inner, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
key, value, found := strings.Cut(part, "=")
if !found {
continue
}
key = strings.TrimSpace(strings.Trim(key, `"'`))
value = strings.TrimSpace(value)
switch strings.ToUpper(key) {
case "ENTRY_POINT":
opts.EntryPoint = parseOptionStringLiteral(value)
case "PACKAGES":
opts.Packages = parseOptionStringArray(value)
}
}
if opts.EntryPoint == "" && len(opts.Packages) == 0 {
return nil
}
return opts
}
func parseOptionStringLiteral(value string) string {
value = strings.TrimSpace(value)
if len(value) >= 2 {
quote := value[0]
if (quote == '\'' || quote == '"') && value[len(value)-1] == quote {
return value[1 : len(value)-1]
}
}
return strings.Trim(value, `"'`)
}
func parseOptionStringArray(value string) []string {
value = strings.TrimSpace(value)
if !strings.HasPrefix(value, "[") {
return nil
}
inner, _, ok := scanBalanced(value, '[', ']')
if !ok {
return nil
}
var out []string
for part := range strings.SplitSeq(inner, ",") {
part = strings.TrimSpace(part)
if part == "" {
continue
}
out = append(out, parseOptionStringLiteral(part))
}
return out
}
package routines
import (
"fmt"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// BuildDDLFromRoutine renders a CREATE statement suitable for engine
// registration and DuckDB persistence from a REST Routine resource.
func BuildDDLFromRoutine(rt bqtypes.Routine) string {
ref := rt.RoutineReference
name := fmt.Sprintf("`%s.%s`", ref.DatasetID, ref.RoutineID)
switch string(rt.RoutineType) {
case routineTypeTableFunction:
return buildTableFunctionDDL(name, rt)
case routineTypeProcedure:
return buildProcedureDDL(name, rt)
default:
return buildScalarFunctionDDL(name, rt)
}
}
func buildScalarFunctionDDL(name string, rt bqtypes.Routine) string {
var b strings.Builder
b.WriteString("CREATE OR REPLACE FUNCTION ")
b.WriteString(name)
b.WriteString("(")
b.WriteString(formatArgumentList(rt.Arguments))
b.WriteString(")")
if rt.ReturnType != nil {
b.WriteString(" RETURNS ")
b.WriteString(formatSQLType(rt.ReturnType))
}
if rt.Language != "" && !strings.EqualFold(string(rt.Language), routineLanguageSQL) {
b.WriteString(" LANGUAGE ")
b.WriteString(string(rt.Language))
}
if rt.PythonOptions != nil {
b.WriteString(" OPTIONS (")
var opts []string
if rt.PythonOptions.EntryPoint != "" {
opts = append(opts, fmt.Sprintf("entry_point='%s'", rt.PythonOptions.EntryPoint))
}
if len(rt.PythonOptions.Packages) > 0 {
quoted := make([]string, 0, len(rt.PythonOptions.Packages))
for _, pkg := range rt.PythonOptions.Packages {
quoted = append(quoted, fmt.Sprintf("'%s'", pkg))
}
opts = append(opts, fmt.Sprintf("packages=[%s]", strings.Join(quoted, ", ")))
}
b.WriteString(strings.Join(opts, ", "))
b.WriteString(")")
}
b.WriteString(" AS (")
b.WriteString(rt.DefinitionBody)
b.WriteString(")")
return b.String()
}
func buildTableFunctionDDL(name string, rt bqtypes.Routine) string {
var b strings.Builder
b.WriteString("CREATE OR REPLACE TABLE FUNCTION ")
b.WriteString(name)
b.WriteString("(")
b.WriteString(formatArgumentList(rt.Arguments))
b.WriteString(") AS (")
b.WriteString(rt.DefinitionBody)
b.WriteString(")")
return b.String()
}
func buildProcedureDDL(name string, rt bqtypes.Routine) string {
var b strings.Builder
b.WriteString("CREATE OR REPLACE PROCEDURE ")
b.WriteString(name)
b.WriteString("(")
b.WriteString(formatArgumentList(rt.Arguments))
b.WriteString(") BEGIN ")
b.WriteString(rt.DefinitionBody)
b.WriteString(" END")
return b.String()
}
func formatArgumentList(args []bqtypes.RoutineArgument) string {
if len(args) == 0 {
return ""
}
parts := make([]string, 0, len(args))
for _, arg := range args {
typ := sqlTypeAnyType
if arg.DataType != nil {
typ = formatSQLType(arg.DataType)
}
parts = append(parts, fmt.Sprintf("%s %s", arg.Name, typ))
}
return strings.Join(parts, ", ")
}
func formatSQLType(t *bqtypes.StandardSqlDataType) string {
if t == nil {
return sqlTypeAnyType
}
kind := string(t.TypeKind)
if strings.EqualFold(kind, sqlTypeArray) && t.ArrayElementType != nil {
return fmt.Sprintf("ARRAY<%s>", formatSQLType(t.ArrayElementType))
}
if strings.EqualFold(kind, sqlTypeStruct) && t.StructType != nil {
fields := make([]string, 0, len(t.StructType.Fields))
for _, f := range t.StructType.Fields {
fields = append(fields, fmt.Sprintf("%s %s", f.Name, formatSQLType(&f.Type)))
}
return fmt.Sprintf("STRUCT<%s>", strings.Join(fields, ", "))
}
return kind
}
// Package routines is the gateway-side in-memory registry of BigQuery
// Routine resources (UDFs, TVFs, stored procedures). REST handlers
// and DDL query jobs register routines here so client libraries can
// round-trip insert/get/list/update/delete without an engine catalog RPC.
package routines
import (
"crypto/rand"
"encoding/hex"
"slices"
"strings"
"sync"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// Store holds routines keyed by projectId:datasetId.routineId.
type Store struct {
mu sync.RWMutex
routines map[string]bqtypes.Routine
}
// NewStore returns an empty routine registry.
func NewStore() *Store {
return &Store{
routines: map[string]bqtypes.Routine{},
}
}
func routineKey(projectID, datasetID, routineID string) string {
return projectID + ":" + datasetID + "." + routineID
}
// Insert registers a new routine. Returns false when the key exists.
func (s *Store) Insert(rt bqtypes.Routine) bool {
if s == nil {
return false
}
ref := rt.RoutineReference
key := routineKey(ref.ProjectID, ref.DatasetID, ref.RoutineID)
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.routines[key]; ok {
return false
}
s.routines[key] = cloneRoutine(rt)
return true
}
// Upsert registers or replaces a routine (CREATE OR REPLACE DDL).
func (s *Store) Upsert(rt bqtypes.Routine) {
if s == nil {
return
}
ref := rt.RoutineReference
key := routineKey(ref.ProjectID, ref.DatasetID, ref.RoutineID)
s.mu.Lock()
defer s.mu.Unlock()
s.routines[key] = cloneRoutine(rt)
}
// Get returns a routine snapshot and whether it was found.
func (s *Store) Get(projectID, datasetID, routineID string) (bqtypes.Routine, bool) {
if s == nil {
return bqtypes.Routine{}, false
}
s.mu.RLock()
defer s.mu.RUnlock()
rt, ok := s.routines[routineKey(projectID, datasetID, routineID)]
return cloneRoutine(rt), ok
}
// Delete removes a routine. Returns false when absent.
func (s *Store) Delete(projectID, datasetID, routineID string) bool {
if s == nil {
return false
}
s.mu.Lock()
defer s.mu.Unlock()
key := routineKey(projectID, datasetID, routineID)
if _, ok := s.routines[key]; !ok {
return false
}
delete(s.routines, key)
return true
}
// List returns routines in the dataset, optionally filtered by
// routineType (filter format: routineType:SCALAR_FUNCTION).
func (s *Store) List(projectID, datasetID, filter string) []bqtypes.Routine {
if s == nil {
return nil
}
wantType := parseRoutineTypeFilter(filter)
prefix := projectID + ":" + datasetID + "."
s.mu.RLock()
defer s.mu.RUnlock()
keys := make([]string, 0, len(s.routines))
for k := range s.routines {
if strings.HasPrefix(k, prefix) {
keys = append(keys, k)
}
}
slices.Sort(keys)
out := make([]bqtypes.Routine, 0, len(keys))
for _, k := range keys {
rt := s.routines[k]
if wantType != "" && string(rt.RoutineType) != wantType {
continue
}
out = append(out, cloneRoutine(rt))
}
return out
}
func parseRoutineTypeFilter(filter string) string {
const prefix = "routineType:"
if filter == "" || !strings.HasPrefix(filter, prefix) {
return ""
}
return strings.TrimSpace(filter[len(prefix):])
}
func cloneRoutine(rt bqtypes.Routine) bqtypes.Routine {
return rt
}
// MintEtag returns a random hex etag for a routine resource.
func MintEtag() string {
var b [8]byte
_, _ = rand.Read(b[:])
return hex.EncodeToString(b[:])
}
package routines
import (
"strconv"
"time"
)
func nowMillis() string {
return strconv.FormatInt(time.Now().UTC().UnixMilli(), 10)
}
package seed
import (
"net"
"net/http"
"strings"
)
// AccessConfig captures the safety knobs gated on the seed routes.
// The handler closes over one of these per gateway process; mutating
// the struct after registration has no effect.
type AccessConfig struct {
// AllowRemote, when false (the default), rejects any request
// whose RemoteAddr is not loopback (127.0.0.0/8 or ::1). The
// rationale: a seed
// operation pulls down real production data and writes it
// into a local emulator -- the call must originate from the
// operator who owns both endpoints, not from a co-tenant
// reachable on the LAN.
AllowRemote bool
// Token, when non-empty, requires every request to carry a
// matching `X-BigQuery-Emulator-Seed-Token` header. This is
// the additional defense for the
// `--seed-api-allow-remote=true` case (CI runners, ephemeral
// VMs) where loopback enforcement is not viable.
Token string
}
// HeaderName is the canonical header name the token check reads.
// Exported so tests don't have to duplicate the literal.
const HeaderName = "X-BigQuery-Emulator-Seed-Token"
// CheckAccess enforces the loopback / token gates on r and returns
// nil when the request is allowed. On denial, returns a reason
// suitable for the BigQuery error envelope so the handler can map
// straight to 403.
//
// Order: loopback first, then token. The loopback rejection always
// wins so a misconfigured operator who left `--seed-api-allow-remote`
// off but is also sending a token doesn't get confused about which
// gate fired.
func (c AccessConfig) CheckAccess(r *http.Request) error {
if !c.AllowRemote {
if !isLoopback(r.RemoteAddr) {
return ErrAccessDenied
}
}
if c.Token != "" {
got := r.Header.Get(HeaderName)
if !secureEqual(got, c.Token) {
return ErrAccessDenied
}
}
return nil
}
// ErrAccessDenied is the sentinel CheckAccess returns. We don't
// distinguish between "wrong remote" and "wrong token" so an
// attacker probing the seed endpoint can't tell which check fired.
var ErrAccessDenied = httpError{code: http.StatusForbidden, msg: "seed: access denied"}
// httpError carries both the HTTP status the handler must write and
// the human-readable message. Implements error so it survives
// errors.Is comparisons.
type httpError struct {
code int
msg string
}
func (e httpError) Error() string { return e.msg }
// Status returns the HTTP status code the handler should respond
// with for this error.
func (e httpError) Status() int { return e.code }
// secureEqual compares two strings in constant time wrt length.
// Constant-time only matters for the token comparison, but isolating
// the helper keeps the call site obvious.
func secureEqual(a, b string) bool {
if len(a) != len(b) {
return false
}
var diff byte
for i := range len(a) {
diff |= a[i] ^ b[i]
}
return diff == 0
}
// isLoopback reports whether remoteAddr (in net/http's
// `host:port` form) is on the local machine. We accept "no port" too
// because tests sometimes inject just an IP for httptest.
func isLoopback(remoteAddr string) bool {
host, _, err := net.SplitHostPort(remoteAddr)
if err != nil {
host = remoteAddr
}
host = strings.TrimSpace(host)
if host == "" {
// Unix-socket / undefined caller; treat as loopback so
// internal callers (e.g. a gateway that's bound to a
// unix socket) aren't locked out.
return true
}
ip := net.ParseIP(host)
if ip == nil {
return false
}
return ip.IsLoopback()
}
// Package seed contains the production-side seeding orchestrator plus
// shared types the YAML seed-file loader (gateway/seedfile) reuses
// when it applies declarative data to the engine.
//
// Both code paths ultimately call into the engine's CatalogClient
// over gRPC -- the same surface the REST handlers
// (gateway/handlers/datasets.go, tables.go, tabledata.go) drive --
// so seeded state is indistinguishable from state created via the
// public REST API.
package seed
import (
"context"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// Applier is the narrow surface seeding code drives to mutate the
// emulator's catalog and rows. It is implemented on top of an
// enginepb.CatalogClient by NewCatalogApplier; tests pass a fake
// implementation so the orchestrator and YAML loader can run without
// a live engine.
type Applier interface {
// EnsureDataset registers (project, dataset) with the engine.
// Implementations treat an ALREADY_EXISTS response as success
// so seeding is idempotent across reruns. Created reports
// whether the call actually changed engine state, so callers
// can tally a "created vs skipped" counter in orchestrator metrics.
EnsureDataset(ctx context.Context, projectID, datasetID, location string) (created bool, err error)
// EnsureTable registers (project, dataset, table) with the
// given schema. Same idempotency contract as EnsureDataset:
// ALREADY_EXISTS surfaces as `created=false, err=nil`. The
// schema is taken at face value; callers that want to evolve
// schemas across runs are responsible for dropping and
// re-registering tables themselves.
EnsureTable(ctx context.Context, ref TableRef, schema *enginepb.TableSchema) (created bool, err error)
// InsertRows appends rows to (ref) in a single RPC. Schema is
// the table's column order so callers can pass a generic
// map-shaped row and the applier lays cells out positionally.
// Returns the number of rows inserted on success.
InsertRows(ctx context.Context, ref TableRef, schema *enginepb.TableSchema, rows []map[string]any) (int, error)
}
// TableRef is the (project, dataset, table) triple the applier API
// passes around. We keep it in this package -- rather than reusing
// enginepb.TableRef directly -- so callers don't need to import the
// generated proto package just to name a destination.
type TableRef struct {
ProjectID string
DatasetID string
TableID string
}
// catalogApplier is the production Applier implementation backed by
// the gRPC CatalogClient.
type catalogApplier struct {
client enginepb.CatalogClient
}
// NewCatalogApplier wraps a CatalogClient so it satisfies Applier.
// The returned applier holds no state of its own; passing the same
// CatalogClient to multiple appliers is safe.
func NewCatalogApplier(c enginepb.CatalogClient) Applier {
return &catalogApplier{client: c}
}
// EnsureDataset wraps Catalog.RegisterDataset with idempotency: an
// ALREADY_EXISTS response is treated as a successful no-op so seed
// reruns don't fail the entire batch.
func (a *catalogApplier) EnsureDataset(ctx context.Context, projectID, datasetID, location string) (bool, error) {
if a == nil || a.client == nil {
return false, errors.New("seed: nil CatalogClient; engine subprocess required to ensure dataset")
}
_, err := a.client.RegisterDataset(ctx, &enginepb.RegisterDatasetRequest{
Dataset: &enginepb.DatasetRef{
ProjectId: projectID,
DatasetId: datasetID,
},
Location: location,
})
if err != nil {
if isAlreadyExists(err) {
return false, nil
}
return false, fmt.Errorf("RegisterDataset %s.%s: %w", projectID, datasetID, err)
}
return true, nil
}
// EnsureTable wraps Catalog.RegisterTable with the same idempotency
// contract as EnsureDataset.
func (a *catalogApplier) EnsureTable(ctx context.Context, ref TableRef, schema *enginepb.TableSchema) (bool, error) {
if a == nil || a.client == nil {
return false, errors.New("seed: nil CatalogClient; engine subprocess required to ensure table")
}
_, err := a.client.RegisterTable(ctx, &enginepb.RegisterTableRequest{
Table: &enginepb.TableRef{
ProjectId: ref.ProjectID,
DatasetId: ref.DatasetID,
TableId: ref.TableID,
},
Schema: schema,
})
if err != nil {
if isAlreadyExists(err) {
return false, nil
}
return false, fmt.Errorf("RegisterTable %s.%s.%s: %w",
ref.ProjectID, ref.DatasetID, ref.TableID, err)
}
return true, nil
}
// InsertRows lays each map-shaped row out positionally against the
// table's schema before forwarding to Catalog.InsertRows. Missing
// columns become NULL cells so the cell count stays in sync with
// the column count Storage::AppendRows expects (mirrors the same
// rule TableDataInsertAll applies for REST inserts).
func (a *catalogApplier) InsertRows(
ctx context.Context,
ref TableRef,
schema *enginepb.TableSchema,
rows []map[string]any,
) (int, error) {
if a == nil || a.client == nil {
return 0, errors.New("seed: nil CatalogClient; engine subprocess required to insert rows")
}
if len(rows) == 0 {
return 0, nil
}
dataRows := make([]*enginepb.DataRow, 0, len(rows))
for _, row := range rows {
dataRows = append(dataRows, rowToProto(schema, row))
}
_, err := a.client.InsertRows(ctx, &enginepb.InsertRowsRequest{
Table: &enginepb.TableRef{
ProjectId: ref.ProjectID,
DatasetId: ref.DatasetID,
TableId: ref.TableID,
},
Rows: dataRows,
})
if err != nil {
return 0, fmt.Errorf("InsertRows %s.%s.%s (%d rows): %w",
ref.ProjectID, ref.DatasetID, ref.TableID, len(rows), err)
}
return len(rows), nil
}
// rowToProto lays a map-shaped row out positionally against the
// schema, mirroring jsonRowToProto in gateway/handlers/tabledata.go.
// Pulled into its own helper so both seeding paths (production
// orchestrator and YAML loader) emit the same wire shape.
func rowToProto(schema *enginepb.TableSchema, row map[string]any) *enginepb.DataRow {
out := &enginepb.DataRow{Cells: make([]*enginepb.Cell, 0, len(schema.GetFields()))}
for _, f := range schema.GetFields() {
v, ok := row[f.GetName()]
if !ok {
out.Cells = append(out.Cells, nullCell())
continue
}
out.Cells = append(out.Cells, cellFromJSONForField(f, v))
}
return out
}
func cellFromJSONForField(f *enginepb.FieldSchema, v any) *enginepb.Cell {
if f == nil {
return ValueToCell(v)
}
if isRepeatedFieldMode(f.GetMode()) {
arr, ok := v.([]any)
if !ok {
return ValueToCell(v)
}
elemSchema := repeatedElementSchema(f)
out := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(arr))}
for _, el := range arr {
out.Elements = append(out.Elements, cellFromJSONForField(elemSchema, el))
}
return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: out}}
}
if isStructFieldType(f.GetType()) {
m, ok := v.(map[string]any)
if !ok {
return ValueToCell(v)
}
st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(f.GetFields()))}
for _, sub := range f.GetFields() {
subV, ok := m[sub.GetName()]
if !ok {
st.Fields = append(st.Fields, nullCell())
continue
}
st.Fields = append(st.Fields, cellFromJSONForField(sub, subV))
}
return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
}
return ValueToCell(v)
}
func isRepeatedFieldMode(mode string) bool {
return strings.EqualFold(strings.TrimSpace(mode), bqModeRepeated)
}
func repeatedElementSchema(f *enginepb.FieldSchema) *enginepb.FieldSchema {
if f == nil {
return nil
}
return &enginepb.FieldSchema{
Name: f.GetName(),
Type: f.GetType(),
Description: f.GetDescription(),
Fields: f.GetFields(),
}
}
func isStructFieldType(t string) bool {
switch strings.ToUpper(strings.TrimSpace(t)) {
case bqTypeStruct, bqTypeRecord:
return true
default:
return false
}
}
func nullCell() *enginepb.Cell {
return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}
}
// ValueToCell converts a generic Go value into a proto Cell using the
// same conventions as gateway/handlers/tabledata.jsonToCell. Exported
// so the YAML loader and tests can reuse the conversion without
// reimplementing the (long, type-switch-heavy) logic.
//
// Conventions:
// - nil -> Cell.null_value = true
// - bool -> "true"/"false"
// - json.Number -> decimal string verbatim
// - float64/int -> formatted decimal string
// - string -> string verbatim
// - []byte -> base64-encoded string (BYTES wire shape)
// - []any -> Array of converted cells
// - map[string]any -> Struct (field order = map iteration order;
// callers that need a deterministic order should pre-marshal to
// a slice of {k, v} pairs and pass it through []any).
//
// Do not use this helper for typed STRUCT or REPEATED columns: it
// ignores map keys and assigns values in Go map iteration order,
// which swaps named subfields (e.g. REPEATED STRUCT<key STRING,
// value JSON>). Always route STRUCT/REPEATED values through
// cellFromJSONForField with the table schema.
func ValueToCell(v any) *enginepb.Cell {
if v == nil {
return nullCell()
}
switch val := v.(type) {
case bool:
if val {
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "true"}}
}
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "false"}}
case json.Number:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: string(val)}}
case float64:
if val == float64(int64(val)) {
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatInt(int64(val), 10),
}}
}
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatFloat(val, 'g', -1, 64),
}}
case int:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.Itoa(val),
}}
case int64:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: strconv.FormatInt(val, 10),
}}
case string:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: val}}
case []byte:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: base64.StdEncoding.EncodeToString(val),
}}
case []any:
arr := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(val))}
for _, el := range val {
arr.Elements = append(arr.Elements, ValueToCell(el))
}
return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: arr}}
case map[string]any:
// Schema-blind: values only, keys discarded. See doc comment.
st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(val))}
for _, fv := range val {
st.Fields = append(st.Fields, ValueToCell(fv))
}
return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
default:
return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
StringValue: fmt.Sprintf("%v", val),
}}
}
}
// Defaults captures the gateway-level fallback values seeding uses
// when callers (REST clients or YAML files) leave a project or
// dataset location empty. The gateway package builds one of these
// from its Options struct (see gateway/seed_runner.go) so the seed
// package itself never imports the gateway package and the two
// can stay free of import cycles.
type Defaults struct {
ProjectID string
DatasetLocation string
}
package seed
import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
// isAlreadyExists reports whether err is a gRPC ALREADY_EXISTS
// response from the engine. Extracted into its own helper so every
// "ensure" path in the applier shares one decision point; if the
// engine ever starts using a different code for the duplicate case
// (e.g. FAILED_PRECONDITION with a typed status detail) this is the
// one function to update.
func isAlreadyExists(err error) bool {
if err == nil {
return false
}
st, ok := status.FromError(err)
if !ok {
return false
}
return st.Code() == codes.AlreadyExists
}
package seed
import (
"context"
"encoding/json"
"errors"
"io"
"log" //nolint:depguard // matches the rest of the gateway package's existing log usage; slog migration is out of scope for this change
"net/http"
"strings"
)
// Status strings reused across the JSON error envelope responses.
// Pulled into named constants so the handler doesn't repeat the same
// literal three times (and so a typo can't sneak past a grep).
const (
statusInvalid = "invalid"
statusNotImplemented = "notImplemented"
statusNotFound = "notFound"
)
// Runner is the interface the HTTP handler uses to dispatch a
// validated SeedRequest to whoever actually copies production data
// into the emulator. The production orchestrator implements this;
// tests inject a fake that returns canned SeedResults so the
// handler can be exercised without a network round-trip.
type Runner interface {
Run(ctx context.Context, req SeedRequest) (*SeedResult, error)
}
// HandlerDeps bundles everything the seed handler set needs at
// registration time. Kept as a struct so the call from gateway/
// server.go stays readable.
type HandlerDeps struct {
// Access enforces loopback / token gates. Per-process; the
// gateway constructs one from gateway.Options.
Access AccessConfig
// Store is the per-process operation registry. The handler
// creates one operation per POST and looks up the right one
// on GET .../operations/{id}.
Store *Store
// Runner does the actual seeding work. Nil means the build
// does not include a production runner; the POST handler
// surfaces 501 NotImplemented with a documented reason so
// operators can tell "the route is wired" from "the build
// can't help me".
Runner Runner
}
// RegisterRoutes installs `POST /api/emulator/seed` and
// `GET /api/emulator/seed/operations/{operationId}` on mux. Idempotent;
// callers wire it once from gateway.NewServer when EnableSeedAPI is
// true.
func RegisterRoutes(mux *http.ServeMux, deps HandlerDeps) {
if deps.Store == nil {
deps.Store = NewStore()
}
mux.HandleFunc("POST /api/emulator/seed", deps.handlePost)
mux.HandleFunc("GET /api/emulator/seed/operations/{operationId}", deps.handleGet)
}
// handlePost accepts a SeedRequest, validates it, mints a new
// operation in the store, and (when a Runner is configured) kicks
// off the actual seeding work on a background goroutine. The
// response is the freshly-minted operation envelope; callers poll
// the GET endpoint for completion.
func (d HandlerDeps) handlePost(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
body, err := io.ReadAll(r.Body)
if err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest,
Status: statusInvalid,
Message: "Could not read seed request body: " + err.Error(),
})
return
}
req, err := DecodeRequest(body)
if err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest,
Status: statusInvalid,
Message: err.Error(),
})
return
}
if err := req.Validate(); err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest,
Status: statusInvalid,
Message: err.Error(),
})
return
}
if d.Runner == nil {
// The route exists but the build does not include a
// production runner. Surface a 501 with the documented
// reason rather than a 200 with an empty Result.
writeJSON(w, http.StatusNotImplemented, errEnvelope{
Code: http.StatusNotImplemented,
Status: statusNotImplemented,
Message: "Production seed is not compiled into this gateway build. " +
"Use --seed-data-file for declarative seeding.",
})
return
}
op := d.Store.New(req)
go d.runOperation(op.ID, req)
writeJSON(w, http.StatusAccepted, operationToWire(op))
}
// handleGet returns the current snapshot of an operation. We never
// surface 404 to the caller; an unknown id still returns a
// well-formed envelope with state="UNKNOWN" so polling loops have
// one less branch to handle.
func (d HandlerDeps) handleGet(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
id := r.PathValue("operationId")
op := d.Store.Get(id)
if op == nil {
writeJSON(w, http.StatusNotFound, errEnvelope{
Code: http.StatusNotFound,
Status: statusNotFound,
Message: "No such seed operation: " + id,
})
return
}
writeJSON(w, http.StatusOK, operationToWire(op))
}
// runOperation moves an operation through RUNNING -> DONE/FAILED in
// the background. We give the runner a fresh context.Background so
// the HTTP request that posted the operation can complete (its
// context goes away) without cancelling the seed work; long
// operations are the norm.
func (d HandlerDeps) runOperation(id string, req SeedRequest) {
if !d.Store.MarkRunning(id) {
return
}
result, err := d.Runner.Run(context.Background(), req)
if err != nil {
log.Printf("seed: operation %s failed: %v", id, err)
d.Store.MarkFailed(id, err.Error())
return
}
d.Store.MarkResult(id, result)
}
// operationToWire flattens the in-memory Operation into the JSON
// envelope the polling endpoint serves. Kept as a separate type
// because the in-memory Operation carries fields (mutex receivers,
// Go-only timestamps) that don't belong on the wire.
type operationWire struct {
ID string `json:"id"`
State OperationState `json:"state"`
Started string `json:"started"`
Finished string `json:"finished,omitempty"`
Request SeedRequest `json:"request"`
Result *SeedResult `json:"result,omitempty"`
Error string `json:"error,omitempty"`
Cancelled bool `json:"cancelled,omitempty"`
}
func operationToWire(op *Operation) operationWire {
w := operationWire{
ID: op.ID,
State: op.State,
Started: op.Started.UTC().Format("2006-01-02T15:04:05Z"),
Request: op.Request,
Result: op.Result,
Error: op.FatalErr,
Cancelled: op.Cancelled,
}
if !op.Finished.IsZero() {
w.Finished = op.Finished.UTC().Format("2006-01-02T15:04:05Z")
}
return w
}
// errEnvelope mirrors the BigQuery-shaped error response the rest
// of the gateway uses (gateway/handlers/handlers.go). Duplicating
// the shape here keeps the seed package from importing the public
// handlers package and creating an import cycle.
type errEnvelope struct {
Code int `json:"code"`
Status string `json:"status"`
Message string `json:"message"`
}
func writeJSON(w http.ResponseWriter, status int, body any) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(status)
_ = json.NewEncoder(w).Encode(body)
}
// writeAccessError maps the seed-specific access denial into an
// HTTP 403 with the same envelope shape the rest of the gateway
// uses. Keeps the deny path uniform across loopback and token
// failures.
func writeAccessError(w http.ResponseWriter, err error) {
code := http.StatusForbidden
msg := err.Error()
var he httpError
if errors.As(err, &he) {
code = he.Status()
}
writeJSON(w, code, errEnvelope{
Code: code,
Status: "accessDenied",
Message: strings.TrimSpace(msg),
})
}
package seed
import (
"context"
"errors"
"fmt"
"strings"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
// ProductionReader is the narrow surface the orchestrator drives
// against the live production BigQuery side. The concrete
// implementation (gateway/seed/production_live.go, gated behind the
// `seed_production_live` build tag) wraps cloud.google.com/go/bigquery
// so the orchestrator package itself never pulls in the heavy cloud
// dependency tree unless the operator explicitly opts in.
//
// Tests stub this interface to feed deterministic metadata and rows
// without a network round-trip.
type ProductionReader interface {
// ListDatasets enumerates the datasets in `projectID` that
// are visible to the calling principal. Implementations may
// stream results (cloud.google.com/go/bigquery's iterator does)
// but the gateway-side API stays slice-shaped so the test
// fakes are easy to write.
ListDatasets(ctx context.Context, projectID string) ([]ProductionDataset, error)
// ListTables enumerates tables, views, and external tables
// inside `projectID.datasetID`. Each entry carries enough
// metadata for the orchestrator to decide whether the table
// is supported (physical) or needs a SeedResourceError
// (view, model, ...).
ListTables(ctx context.Context, projectID, datasetID string) ([]ProductionTable, error)
// DescribeTable returns the physical schema for one table.
// Only called for entries ListTables reported as
// supportable (physical tables, snapshots).
DescribeTable(ctx context.Context, projectID, datasetID, tableID string) (*enginepb.TableSchema, error)
// ReadRows pages through one table's rows. The orchestrator
// uses maxRows (the request's MaxRowsPerTable knob) to cap
// the read; an implementation may stop early.
ReadRows(ctx context.Context, projectID, datasetID, tableID string, maxRows int64) ([]map[string]any, error)
}
// ProductionDataset is the slimmed-down view of a BigQuery dataset
// the orchestrator needs. The fields map 1:1 to the dataset
// resource in cloud.google.com/go/bigquery so the live adapter is
// a thin lift.
type ProductionDataset struct {
ProjectID string
DatasetID string
Location string
}
// ProductionTable similarly carries the fields the orchestrator
// reads off the cloud client's TableMetadata.
type ProductionTable struct {
ProjectID string
DatasetID string
TableID string
// Type is the cloud library's table-type string ("TABLE",
// "VIEW", "MATERIALIZED_VIEW", "EXTERNAL", "MODEL", ...).
// Anything other than "TABLE" lands in ResourceErrors with
// kind=unsupported until the engine learns to persist them.
Type string
}
// Orchestrator wires a ProductionReader to an Applier and runs one
// SeedRequest through them. It is the production-side
// implementation of Runner.
type Orchestrator struct {
Reader ProductionReader
Applier Applier
Defaults Defaults
// EnvLookup is consulted when ResolveBillingProject walks
// the env fallback chain. Defaults to os.LookupEnv when nil
// so production calls work without explicit wiring.
EnvLookup func(string) (string, bool)
}
// NewOrchestrator constructs an Orchestrator with sensible defaults.
// reader must be non-nil; the constructor panics rather than letting
// the caller pass nil here because a nil reader produces confusing
// "method on nil receiver" failures deep inside Run.
func NewOrchestrator(reader ProductionReader, applier Applier, defaults Defaults) *Orchestrator {
if reader == nil {
panic("seed: NewOrchestrator: reader must be non-nil; pass NewProductionReader or a test stub")
}
if applier == nil {
panic("seed: NewOrchestrator: applier must be non-nil")
}
return &Orchestrator{
Reader: reader,
Applier: applier,
Defaults: defaults,
EnvLookup: LookupEnvOrEmpty,
}
}
// Run executes one SeedRequest. The request must have already
// passed SeedRequest.Validate; the orchestrator double-checks
// anyway so a malformed request from a non-HTTP caller surfaces as
// ErrInvalidRequest rather than a panic.
//
// The returned SeedResult is always non-nil on a successful Run --
// individual resource failures accumulate in Result.ResourceErrors
// rather than aborting the whole operation. Run returns a non-nil
// error only when the entire seed cannot proceed (missing creds,
// project doesn't exist, list RPC failed).
func (o *Orchestrator) Run(ctx context.Context, req SeedRequest) (*SeedResult, error) {
if err := req.Validate(); err != nil {
return nil, err
}
result := &SeedResult{Started: nowRFC3339()}
// BillingProject is currently consumed by the live reader;
// computing it here lets us validate the fallback chain even
// when tests don't go through the live adapter.
_ = ResolveBillingProject(req, o.Defaults.ProjectID, o.envLookup())
dest := destinationOf(req)
switch {
case req.Source.Table != "":
o.seedTable(ctx, req.Source.Project, req.Source.Dataset, req.Source.Table,
dest.Project, dest.Dataset, dest.Table,
req.MaxRowsPerTable, result)
case req.Source.Dataset != "":
o.seedDataset(ctx, req.Source.Project, req.Source.Dataset,
dest.Project, dest.Dataset, req.MaxRowsPerTable, result)
default:
o.seedProject(ctx, req.Source.Project, dest.Project, req.MaxRowsPerTable, result)
}
result.Finished = nowRFC3339()
return result, nil
}
// envLookup returns the orchestrator's configured lookup or a
// no-op when unset. Centralizing the nil-check keeps Run readable.
func (o *Orchestrator) envLookup() func(string) (string, bool) {
if o.EnvLookup != nil {
return o.EnvLookup
}
return func(string) (string, bool) { return "", false }
}
// seedProject copies every dataset under sourceProject into
// destProject. A ListDatasets failure is the only error path that
// short-circuits the entire op; per-dataset failures fold into
// ResourceErrors.
func (o *Orchestrator) seedProject(
ctx context.Context,
sourceProject, destProject string,
maxRows int64,
result *SeedResult,
) {
datasets, err := o.Reader.ListDatasets(ctx, sourceProject)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: "project:" + sourceProject,
Kind: resourceKindRPC,
Error: fmt.Sprintf("ListDatasets: %v", err),
})
return
}
for _, ds := range datasets {
o.seedDataset(ctx, sourceProject, ds.DatasetID, destProject, ds.DatasetID, maxRows, result)
}
}
// seedDataset copies one source dataset into the destination
// project + dataset name. Caller is responsible for choosing the
// destination dataset id (mirror or override).
func (o *Orchestrator) seedDataset(
ctx context.Context,
sourceProject, sourceDataset, destProject, destDataset string,
maxRows int64,
result *SeedResult,
) {
if destDataset == "" {
destDataset = sourceDataset
}
location := ""
if dsList, _ := o.Reader.ListDatasets(ctx, sourceProject); len(dsList) > 0 {
for _, d := range dsList {
if d.DatasetID == sourceDataset {
location = d.Location
break
}
}
}
if location == "" {
location = o.Defaults.DatasetLocation
}
created, err := o.Applier.EnsureDataset(ctx, destProject, destDataset, location)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("dataset:%s.%s", destProject, destDataset),
Kind: resourceKindWrite,
Error: err.Error(),
})
return
}
if created {
result.DatasetsCreated++
} else {
result.DatasetsSkipped++
}
tables, err := o.Reader.ListTables(ctx, sourceProject, sourceDataset)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("dataset:%s.%s", sourceProject, sourceDataset),
Kind: resourceKindRPC,
Error: fmt.Sprintf("ListTables: %v", err),
})
return
}
for _, tbl := range tables {
o.seedTable(ctx, sourceProject, sourceDataset, tbl.TableID,
destProject, destDataset, tbl.TableID, maxRows, result)
}
}
// seedTable copies a single source table into the destination.
// Unsupported source types (views, materialized views, models,
// external) are reported as ResourceErrors without aborting the
// surrounding seed.
//
// The body is split across resolveSourceTable / writeTableMetadata
// / copyTableRows helpers; the function reads top-down and never
// produces partial state because each helper short-circuits by
// appending to result.ResourceErrors.
func (o *Orchestrator) seedTable(
ctx context.Context,
sourceProject, sourceDataset, sourceTable, destProject, destDataset, destTable string,
maxRows int64,
result *SeedResult,
) {
if destTable == "" {
destTable = sourceTable
}
match, ok := o.resolveSourceTable(ctx, sourceProject, sourceDataset, sourceTable, result)
if !ok {
return
}
_ = match // already validated to be a TABLE by resolveSourceTable
schema, ok := o.describeSourceSchema(ctx, sourceProject, sourceDataset, sourceTable, result)
if !ok {
return
}
ref := TableRef{ProjectID: destProject, DatasetID: destDataset, TableID: destTable}
if !o.writeTableMetadata(ctx, ref, schema, result) {
return
}
o.copyTableRows(ctx, sourceProject, sourceDataset, sourceTable, ref, schema, maxRows, result)
}
// resolveSourceTable looks up the source table's metadata and
// returns it when it's a supported (physical TABLE) entry.
// Unsupported types fold into a ResourceError and ok=false.
func (o *Orchestrator) resolveSourceTable(
ctx context.Context,
sourceProject, sourceDataset, sourceTable string,
result *SeedResult,
) (*ProductionTable, bool) {
tables, err := o.Reader.ListTables(ctx, sourceProject, sourceDataset)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
Kind: resourceKindRPC,
Error: fmt.Sprintf("ListTables: %v", err),
})
return nil, false
}
var match *ProductionTable
for i := range tables {
if tables[i].TableID == sourceTable {
match = &tables[i]
break
}
}
if match == nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
Kind: resourceKindRead,
Error: "table not found",
})
return nil, false
}
if !strings.EqualFold(match.Type, "TABLE") && match.Type != "" {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
Kind: resourceKindUnsupported,
Error: fmt.Sprintf(
"type %q is not yet supported by the BigQuery emulator; only physical TABLE entries are seeded",
match.Type,
),
})
return nil, false
}
return match, true
}
// describeSourceSchema fetches the source table's schema.
func (o *Orchestrator) describeSourceSchema(
ctx context.Context,
sourceProject, sourceDataset, sourceTable string,
result *SeedResult,
) (*enginepb.TableSchema, bool) {
schema, err := o.Reader.DescribeTable(ctx, sourceProject, sourceDataset, sourceTable)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
Kind: resourceKindRead,
Error: fmt.Sprintf("DescribeTable: %v", err),
})
return nil, false
}
return schema, true
}
// writeTableMetadata creates the destination table and bumps the
// Created/Skipped counters. Returns false when the EnsureTable call
// errored (and a ResourceError was already appended).
func (o *Orchestrator) writeTableMetadata(
ctx context.Context,
ref TableRef,
schema *enginepb.TableSchema,
result *SeedResult,
) bool {
created, err := o.Applier.EnsureTable(ctx, ref, schema)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", ref.ProjectID, ref.DatasetID, ref.TableID),
Kind: resourceKindWrite,
Error: err.Error(),
})
return false
}
if created {
result.TablesCreated++
} else {
result.TablesSkipped++
}
return true
}
// copyTableRows reads up to maxRows rows from the source table and
// inserts them into the destination. Empty-row reads short-circuit
// the InsertRows RPC.
func (o *Orchestrator) copyTableRows(
ctx context.Context,
sourceProject, sourceDataset, sourceTable string,
ref TableRef,
schema *enginepb.TableSchema,
maxRows int64,
result *SeedResult,
) {
rows, err := o.Reader.ReadRows(ctx, sourceProject, sourceDataset, sourceTable, maxRows)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
Kind: resourceKindRead,
Error: fmt.Sprintf("ReadRows: %v", err),
})
return
}
if len(rows) == 0 {
return
}
n, err := o.Applier.InsertRows(ctx, ref, schema, rows)
if err != nil {
result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
Resource: fmt.Sprintf("table:%s.%s.%s", ref.ProjectID, ref.DatasetID, ref.TableID),
Kind: resourceKindWrite,
Error: err.Error(),
})
return
}
result.RowsCopied += int64(n)
}
// Resource error classification strings. Pulled into named
// constants so the orchestrator and tests both reference one
// source of truth, and so a future renamed kind value updates one
// place.
const (
resourceKindRPC = "rpc"
resourceKindRead = "read"
resourceKindWrite = "write"
resourceKindUnsupported = "unsupported"
)
// destinationOf folds the optional Destination override into a
// concrete (project, dataset, table) triple the seed helpers act
// on. Empty fields map to the source values.
func destinationOf(req SeedRequest) struct{ Project, Dataset, Table string } {
out := struct{ Project, Dataset, Table string }{
Project: req.Source.Project,
Dataset: req.Source.Dataset,
Table: req.Source.Table,
}
if req.Destination == nil {
return out
}
if v := strings.TrimSpace(req.Destination.Project); v != "" {
out.Project = v
}
if v := strings.TrimSpace(req.Destination.Dataset); v != "" {
out.Dataset = v
}
if v := strings.TrimSpace(req.Destination.Table); v != "" {
out.Table = v
}
return out
}
// NewProductionReader is the constructor the gateway calls when the
// operator enables the seed API. The default build returns
// ErrProductionUnsupported; building with `-tags=seed_production_live`
// swaps in the cloud.google.com/go/bigquery-backed implementation
// (gateway/seed/production_live.go).
//
// Callers that just want the YAML loader (gateway/seedfile) don't
// need to call this and don't pay for the heavy cloud deps.
//
// The unused parameter list is intentional: the live impl needs
// the project to set ADC quota and the env lookup to derive
// fallbacks for the production client signature).
func NewProductionReader(
ctx context.Context,
billingProject string,
getenv func(string) (string, bool),
) (ProductionReader, error) {
return nil, errors.New(ErrProductionUnsupported.Error())
}
package seed
import (
"crypto/rand"
"encoding/hex"
"sync"
"time"
)
// OperationState enumerates the lifecycle states a seed operation
// moves through.
//
// Pending -> Running -> Done | Failed
type OperationState string
const (
OperationPending OperationState = "PENDING"
OperationRunning OperationState = "RUNNING"
OperationDone OperationState = "DONE"
OperationFailed OperationState = "FAILED"
)
// Operation is the persisted view of an in-flight or completed seed
// operation. The HTTP handler converts this into the wire-shape
// the polling endpoint serves; keeping it as a Go struct (rather
// than a raw map) makes the store's tests easier to read.
//
// Started/Finished use time.Time even though only Finished can be
// zero-valued (Started is always stamped on New). The wire-shape
// formatting -- including omitting an empty Finished -- happens in
// operationToWire over in handler.go, so this struct itself is
// purely the in-memory view and the json tags are minimal.
type Operation struct {
ID string `json:"id"`
State OperationState `json:"state"`
Started time.Time `json:"started"`
Finished time.Time `json:"finished"`
Request SeedRequest `json:"request"`
Result *SeedResult `json:"result,omitempty"`
FatalErr string `json:"error,omitempty"`
Cancelled bool `json:"cancelled,omitempty"`
}
// Store holds the per-process operation registry. The HTTP handler
// stores newly-minted operations here and reads them back on poll;
// the orchestrator drives state transitions via Mark*. The store is
// in-memory; restarting the gateway forgets every operation, which
// is consistent with the rest of the emulator's lifecycle.
type Store struct {
mu sync.Mutex
ops map[string]*Operation
// idGen mints opaque operation IDs. Pulled out into a func
// so tests can pin "operationN" instead of random hex.
idGen func() string
}
// NewStore constructs an empty operation registry. Each gateway
// process owns one; the seed handler closes over it.
func NewStore() *Store {
return &Store{
ops: make(map[string]*Operation),
idGen: newRandomID,
}
}
// New registers a fresh operation in PENDING state and returns it.
// Callers immediately transition to RUNNING via MarkRunning when the
// orchestrator picks it up.
func (s *Store) New(req SeedRequest) *Operation {
s.mu.Lock()
defer s.mu.Unlock()
op := &Operation{
ID: s.idGen(),
State: OperationPending,
Started: time.Now().UTC(),
Request: req,
}
s.ops[op.ID] = op
return cloneOperation(op)
}
// Get returns a snapshot of the operation with the given ID.
// Returns nil when no such operation exists.
func (s *Store) Get(id string) *Operation {
s.mu.Lock()
defer s.mu.Unlock()
op, ok := s.ops[id]
if !ok {
return nil
}
return cloneOperation(op)
}
// MarkRunning records that the orchestrator has started processing
// the operation. No-op (returns false) when the operation doesn't
// exist or has already left PENDING.
func (s *Store) MarkRunning(id string) bool {
s.mu.Lock()
defer s.mu.Unlock()
op, ok := s.ops[id]
if !ok || op.State != OperationPending {
return false
}
op.State = OperationRunning
return true
}
// MarkResult records a successful (or partially-successful)
// completion. The operation's Finished timestamp is stamped here.
func (s *Store) MarkResult(id string, result *SeedResult) bool {
s.mu.Lock()
defer s.mu.Unlock()
op, ok := s.ops[id]
if !ok {
return false
}
op.State = OperationDone
op.Result = result
op.Finished = time.Now().UTC()
return true
}
// MarkFailed records a catastrophic failure (unreadable production,
// missing creds, ...). Per-resource failures should be folded into
// Result.ResourceErrors and reported via MarkResult instead.
func (s *Store) MarkFailed(id, errMsg string) bool {
s.mu.Lock()
defer s.mu.Unlock()
op, ok := s.ops[id]
if !ok {
return false
}
op.State = OperationFailed
op.FatalErr = errMsg
op.Finished = time.Now().UTC()
return true
}
// cloneOperation returns a deep-enough copy of op so the caller can
// mutate it without racing other goroutines reading from the store.
// The struct is small and Result is repointed (we never mutate a
// Result after passing it through MarkResult), so a shallow copy is
// sufficient.
func cloneOperation(op *Operation) *Operation {
cp := *op
return &cp
}
// newRandomID mints a 16-character hex ID. 64 bits of entropy is
// plenty -- the seed store is in-memory and never federated, so
// collision risk is bounded by the lifetime of one gateway process.
func newRandomID() string {
var b [8]byte
if _, err := rand.Read(b[:]); err != nil {
// crypto/rand is documented to never fail in practice
// on modern OSes; if it does, fall back to a clearly
// debug-able id rather than panic.
return "op-rand-error"
}
return "op-" + hex.EncodeToString(b[:])
}
package seed
import (
"encoding/json"
"errors"
"fmt"
"os"
"strings"
"time"
)
// SeedRequest is the JSON body the seed API accepts on
// `POST /api/emulator/seed`. The contract is documented in
// docs/SEEDING.md so operators with existing seed tooling can
// reuse request bodies without changes.
type SeedRequest struct {
// Source is the production-side resource the seeder reads
// from. Required.
Source SeedEndpointRef `json:"source"`
// Destination is the emulator-side resource the seeder writes
// into. When omitted, the seeder mirrors Source 1:1 (same
// project/dataset/table ids on this emulator).
Destination *SeedDestinationRef `json:"destination,omitempty"`
// MaxRowsPerTable bounds the number of rows the seeder will
// copy from any single source table. Zero / negative means
// "no limit". This is the dominant safety knob -- operators
// trying to mirror billion-row tables into a local emulator
// should set this aggressively.
MaxRowsPerTable int64 `json:"maxRowsPerTable,omitempty"`
// BillingProject is the GCP project the BigQuery jobs the
// production read issues are billed against. When omitted
// the seeder falls back through the documented chain (see
// ResolveBillingProject).
BillingProject string `json:"billingProject,omitempty"`
}
// SeedEndpointRef names a production resource. Either Project (full
// project scope), Project+Dataset (dataset scope), or
// Project+Dataset+Table (single-table scope) is supported. Source
// requests with no Project are rejected up front -- BigQuery has no
// well-defined notion of "default project" on the wire.
type SeedEndpointRef struct {
Project string `json:"project"`
Dataset string `json:"dataset,omitempty"`
Table string `json:"table,omitempty"`
}
// SeedDestinationRef is the same shape as SeedEndpointRef but the
// dataset/table fields are optional remappings. When omitted the
// seeder mirrors the source name verbatim; when set it copies
// `Source.Project.Source.Dataset.Source.Table` into
// `Destination.Project.Destination.Dataset.Destination.Table`.
type SeedDestinationRef struct {
Project string `json:"project,omitempty"`
Dataset string `json:"dataset,omitempty"`
Table string `json:"table,omitempty"`
}
// SeedResult is what we report back to the caller once an operation
// finishes. Counters follow the stable seed API shape documented in
// docs/SEEDING.md so dashboards and scripts can read responses
// without changes.
type SeedResult struct {
// Started / Finished are RFC 3339 timestamps.
Started string `json:"started"`
Finished string `json:"finished,omitempty"`
// DatasetsCreated counts datasets the seeder added to the
// emulator on this run. Idempotent reruns surface 0 here and
// a positive DatasetsSkipped.
DatasetsCreated int `json:"datasetsCreated"`
DatasetsSkipped int `json:"datasetsSkipped"`
// TablesCreated / TablesSkipped are the same shape for
// physical-table resources. Views, materialized views,
// external tables, and routines all fold into
// ResourceErrors (one entry per unsupported resource) for
// the initial integration; see ROADMAP for the support
// matrix.
TablesCreated int `json:"tablesCreated"`
TablesSkipped int `json:"tablesSkipped"`
// RowsCopied is the wall-total of rows the seeder
// successfully inserted into the emulator across every
// destination table this operation touched.
RowsCopied int64 `json:"rowsCopied"`
// ResourceErrors holds per-resource failures. The operation
// itself can still finish "DONE" while individual tables
// failed -- partial-failure data is returned without forcing
// the caller to retry the entire scope.
ResourceErrors []SeedResourceError `json:"resourceErrors,omitempty"`
}
// SeedResourceError captures a per-resource failure (one table, one
// view, one routine, ...). The presence of any non-empty Error in
// ResourceErrors is what the operation polling endpoint surfaces in
// the public Operation.error field; the operation as a whole only
// fails when something catastrophic happens (the production project
// is unreachable, ADC credentials are missing, etc).
type SeedResourceError struct {
// Resource is a human-readable identifier
// ("dataset:proj.ds", "table:proj.ds.tbl", "view:proj.ds.v", ...).
Resource string `json:"resource"`
// Kind classifies why the failure happened
// ("unsupported", "rpc", "read", "write", "skipped").
Kind string `json:"kind"`
Error string `json:"error"`
}
// Validate runs cheap input checks the orchestrator depends on
// before it touches the network. Returns ErrInvalidRequest with a
// human-readable message so the HTTP handler can surface a 400 with
// the right reason.
func (r *SeedRequest) Validate() error {
if r == nil {
return fmt.Errorf("%w: nil request body", ErrInvalidRequest)
}
if strings.TrimSpace(r.Source.Project) == "" {
return fmt.Errorf("%w: source.project is required", ErrInvalidRequest)
}
if r.Source.Table != "" && r.Source.Dataset == "" {
return fmt.Errorf("%w: source.table requires source.dataset", ErrInvalidRequest)
}
if r.Destination != nil {
if r.Destination.Table != "" && r.Destination.Dataset == "" {
return fmt.Errorf("%w: destination.table requires destination.dataset", ErrInvalidRequest)
}
if r.Source.Dataset == "" && r.Destination.Dataset != "" {
return fmt.Errorf(
"%w: destination.dataset requires source.dataset (cannot remap a project-scope seed to a single dataset)",
ErrInvalidRequest,
)
}
if r.Source.Table == "" && r.Destination.Table != "" {
return fmt.Errorf("%w: destination.table requires source.table", ErrInvalidRequest)
}
}
if r.MaxRowsPerTable < 0 {
return fmt.Errorf("%w: maxRowsPerTable must be >= 0", ErrInvalidRequest)
}
return nil
}
// Env var names walked by ResolveBillingProject's fallback chain.
// Exported as package-level constants so tests and callers reference
// the same strings the implementation looks up.
const (
EnvGoogleCloudQuotaProject = "GOOGLE_CLOUD_QUOTA_PROJECT"
EnvGoogleCloudProject = "GOOGLE_CLOUD_PROJECT"
EnvGcloudProject = "GCLOUD_PROJECT"
)
// billingEnvChain is the documented env-var fallback order
// ResolveBillingProject walks; pulled into a package-level var so
// the order stays self-documenting and tests assert against the
// same source of truth.
var billingEnvChain = []string{
EnvGoogleCloudQuotaProject,
EnvGoogleCloudProject,
EnvGcloudProject,
}
// ResolveBillingProject picks the GCP project the seeder bills its
// production reads against. The fallback chain matches what
// gcloud's tooling follows:
//
// 1. Request body's `billingProject`.
// 2. Gateway default project (--project-id).
// 3. $GOOGLE_CLOUD_QUOTA_PROJECT.
// 4. $GOOGLE_CLOUD_PROJECT.
// 5. $GCLOUD_PROJECT.
// 6. Source project (the read project itself).
//
// `getenv` is injected so tests don't depend on os.Environ; the
// production caller passes os.LookupEnv. A nil getenv is treated
// as "no env vars set" so production code that forgot to pass one
// still gets sane behavior.
func ResolveBillingProject(req SeedRequest, gatewayDefault string, getenv func(string) (string, bool)) string {
if v := strings.TrimSpace(req.BillingProject); v != "" {
return v
}
if v := strings.TrimSpace(gatewayDefault); v != "" {
return v
}
if getenv == nil {
return strings.TrimSpace(req.Source.Project)
}
for _, key := range billingEnvChain {
if v, ok := getenv(key); ok && strings.TrimSpace(v) != "" {
return strings.TrimSpace(v)
}
}
return strings.TrimSpace(req.Source.Project)
}
// ErrInvalidRequest is the sentinel SeedRequest.Validate wraps so
// callers can detect "this is a 400, not a 500" without string
// matching. Compare via errors.Is.
var ErrInvalidRequest = errors.New("seed: invalid request")
// ErrProductionUnsupported is returned by NewProductionOrchestrator
// when the build does not include a live production client. The
// initial integration intentionally ships without a hard dependency
// on cloud.google.com/go/bigquery so operators who only need the
// YAML seed loader (or the per-PR test runner) don't pay for it; a
// future build tag will swap in the real implementation.
var ErrProductionUnsupported = errors.New("seed: production seeding is not compiled into this build")
// LookupEnvOrEmpty is the production env lookup used by the seed
// handler when it needs to consult the documented env chain (see
// ResolveBillingProject). Pulled out so tests can stub it via the
// type alias rather than the global.
var LookupEnvOrEmpty = os.LookupEnv
// nowRFC3339 returns the current UTC time in the format the
// SeedResult timestamps use. Wrapped in a package-level var so
// tests can pin time.
var nowRFC3339 = func() string {
return time.Now().UTC().Format(time.RFC3339)
}
// DecodeRequest parses a JSON request body into a SeedRequest. The
// helper exists so the handler doesn't have to know whether to
// configure json.Decoder.DisallowUnknownFields (we do, so a typo
// like "billing_project" surfaces as a 400 rather than silently
// ignoring the operator's intent).
func DecodeRequest(b []byte) (SeedRequest, error) {
var req SeedRequest
dec := json.NewDecoder(strings.NewReader(string(b)))
dec.DisallowUnknownFields()
if err := dec.Decode(&req); err != nil {
return SeedRequest{}, fmt.Errorf("%w: %w", ErrInvalidRequest, err)
}
return req, nil
}
package gateway
import (
"context"
"os"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
)
// newSeedRunner constructs the production seed.Runner the
// `POST /api/emulator/seed` handler dispatches to when
// EnableSeedAPI is true.
//
// The default build returns a runner whose underlying
// ProductionReader is the "unsupported" stub
// (seed.NewProductionReader without a build tag), so the seed
// handler still surfaces a clean 501 instead of a panic when the
// operator forgot to build with `-tags=seed_production_live`. The
// CatalogClient half is always real -- it's what the gateway
// already uses for every REST handler.
//
// The returned Runner closes over the engine client; callers must
// keep the engine subprocess alive for as long as they expect to
// service seed requests.
func newSeedRunner(opts Options, eng *engine.Client) seed.Runner {
defaults := DefaultsFromOptions(opts)
return &lazyProductionRunner{
defaults: defaults,
applier: seed.NewCatalogApplier(eng.Catalog),
envLookup: os.LookupEnv,
}
}
// DefaultsFromOptions projects the seeding-relevant fields of an
// Options struct into the small seed.Defaults shape the orchestrator
// and YAML loader consume. Lives in the gateway package (rather
// than gateway/seed) so the seed package never imports the gateway
// package, which would create an import cycle through the route
// registration in gateway/server.go.
func DefaultsFromOptions(o Options) seed.Defaults {
return seed.Defaults{
ProjectID: o.DefaultProjectID,
DatasetLocation: o.DefaultDatasetLocation,
}
}
// lazyProductionRunner defers ProductionReader construction until
// the first request. That keeps gateway startup time cheap when
// nobody actually invokes the seed API and lets us surface ADC /
// quota / billing errors as a per-operation failure rather than a
// hard fail at gateway boot.
type lazyProductionRunner struct {
defaults seed.Defaults
applier seed.Applier
envLookup func(string) (string, bool)
}
// Run satisfies seed.Runner. The first invocation constructs the
// production reader (which fails with ErrProductionUnsupported in
// the default build); subsequent invocations get their own reader
// so the cloud client's connection lifecycle stays scoped to one
// request (one reader per seed invocation).
func (l *lazyProductionRunner) Run(ctx context.Context, req seed.SeedRequest) (*seed.SeedResult, error) {
billing := seed.ResolveBillingProject(req, l.defaults.ProjectID, l.envLookup)
reader, err := seed.NewProductionReader(ctx, billing, l.envLookup)
if err != nil {
return nil, err
}
orch := seed.NewOrchestrator(reader, l.applier, l.defaults)
orch.EnvLookup = l.envLookup
return orch.Run(ctx, req)
}
package seedfile
import (
"context"
"fmt"
"strings"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
)
// applyTimeout bounds the total wall time the loader spends talking
// to the engine when applying one file. Large seeds (thousands of
// rows) easily fit inside this budget on a local engine; a value
// much smaller risks flaky startups on CI when the engine is still
// warming caches.
const applyTimeout = 2 * time.Minute
// ApplyFiles loads and applies each YAML seed file in order. The
// loader fails fast on the first file that does not parse or
// validate so operators see the actual schema error rather than a
// stream of confusing follow-on failures.
//
// `defaults` carries the gateway-level fallbacks (project id, dataset
// location) used when a file omits them. `applier` is the engine-
// facing surface; in production this is seed.NewCatalogApplier over
// the live CatalogClient.
func ApplyFiles(paths []string, applier seed.Applier, defaults seed.Defaults) error {
ctx, cancel := context.WithTimeout(context.Background(), applyTimeout)
defer cancel()
return ApplyFilesContext(ctx, paths, applier, defaults)
}
// ApplyFilesContext is the context-aware twin of ApplyFiles. Use
// this from tests so they can pass a short-deadline context to
// exercise cancellation behavior.
func ApplyFilesContext(ctx context.Context, paths []string, applier seed.Applier, defaults seed.Defaults) error {
for _, p := range paths {
f, err := Load(p)
if err != nil {
return err
}
if err := Apply(ctx, f, applier, defaults); err != nil {
return fmt.Errorf("seedfile %s: %w", p, err)
}
}
return nil
}
// Apply materializes one decoded File against the engine via the
// supplied applier. The order is deterministic: datasets in the
// order they appear in the file, tables within a dataset in
// declaration order, rows in declaration order. Operators rely on
// this for reproducible seeds (e.g. autoincrement-style ids).
//
// The function is forgiving on "already exists" errors at the
// dataset and table level: the applier returns created=false in
// that case. Rows are inserted only when the table was newly
// created so gateway restarts against a persistent data_dir do
// not duplicate seed data.
func Apply(ctx context.Context, f *File, applier seed.Applier, defaults seed.Defaults) error {
if f == nil {
return nil
}
for i, ds := range f.Datasets {
project := firstNonEmpty(ds.ProjectID, f.DefaultProjectID, defaults.ProjectID)
if project == "" {
return fmt.Errorf(
"datasets[%d] (id=%q): no project_id set (file default, dataset entry, and --project-id all empty)",
i,
ds.ID,
)
}
location := firstNonEmpty(ds.Location, f.DefaultLocation, defaults.DatasetLocation)
if _, err := applier.EnsureDataset(ctx, project, ds.ID, location); err != nil {
return fmt.Errorf("ensure dataset %s.%s: %w", project, ds.ID, err)
}
for j, tbl := range ds.Tables {
schema := fieldsToProto(tbl.Schema)
ref := seed.TableRef{
ProjectID: project,
DatasetID: ds.ID,
TableID: tbl.ID,
}
created, err := applier.EnsureTable(ctx, ref, schema)
if err != nil {
return fmt.Errorf("ensure table %s.%s.%s: %w",
project, ds.ID, tbl.ID, err)
}
if len(tbl.Rows) == 0 || !created {
continue
}
if _, err := applier.InsertRows(ctx, ref, schema, tbl.Rows); err != nil {
return fmt.Errorf("insert rows for %s.%s.%s (file datasets[%d].tables[%d]): %w",
project, ds.ID, tbl.ID, i, j, err)
}
}
}
return nil
}
// firstNonEmpty returns the first trim-non-empty string from the
// supplied values. Used to walk the (entry > file-default >
// gateway-default) precedence chain for project id and location.
func firstNonEmpty(vs ...string) string {
for _, v := range vs {
if t := strings.TrimSpace(v); t != "" {
return t
}
}
return ""
}
// fieldsToProto recursively converts the YAML FieldSchema slice
// into the engine's proto TableSchema. Nested STRUCT/RECORD fields
// are walked verbatim.
func fieldsToProto(fields []FieldSchema) *enginepb.TableSchema {
out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(fields))}
for _, f := range fields {
out.Fields = append(out.Fields, fieldToProto(f))
}
return out
}
func fieldToProto(f FieldSchema) *enginepb.FieldSchema {
fieldType := f.Type
if strings.EqualFold(fieldType, "RECORD") {
fieldType = "STRUCT"
}
pf := &enginepb.FieldSchema{
Name: f.Name,
Type: fieldType,
Mode: f.Mode,
Description: f.Description,
}
for _, sub := range f.Fields {
pf.Fields = append(pf.Fields, fieldToProto(sub))
}
return pf
}
package seedfile
import (
"path/filepath"
"regexp"
"slices"
"strings"
)
// PublicDataProject is the BigQuery project id thirdparty samples use for
// public dataset queries.
const PublicDataProject = "bigquery-public-data"
// PublicDataSeedRelPath is the repo-relative path to the bundled YAML
// fixture. Docker copies it under /opt/bigquery-emulator/.
const PublicDataSeedRelPath = "testdata/public-data/bigquery-public-data.yaml"
// PublicDataSeedContainerPath is where the runtime image installs the
// fixture so gateway_main can pass --seed-data-file without host mounts.
const PublicDataSeedContainerPath = "/opt/bigquery-emulator/testdata/public-data/bigquery-public-data.yaml"
// SeededPublicTables lists project.dataset.table resources the bundled
// fixture materializes. Skip matrices (python emulator_pytest_skip,
// third_party/README.md) treat only these refs as emulator-backed.
var SeededPublicTables = []string{
PublicDataProject + ".usa_names.usa_1910_2013",
PublicDataProject + ".usa_names.usa_1910_current",
PublicDataProject + ".samples.shakespeare",
PublicDataProject + ".github_repos.commits",
PublicDataProject + ".stackoverflow.posts_questions",
PublicDataProject + ".ml_datasets.penguins",
PublicDataProject + ".utility_us.country_code_iso",
}
var publicTableRefRE = regexp.MustCompile(
`bigquery-public-data[.:]([a-zA-Z0-9_]+)[.:]([a-zA-Z0-9_]+)`,
)
// PublicDataSeedPathFromRoot returns the absolute path to the bundled
// fixture given a repository root directory.
func PublicDataSeedPathFromRoot(repoRoot string) string {
return filepath.Join(repoRoot, PublicDataSeedRelPath)
}
// PublicDataRefsInText returns normalized project.dataset.table refs
// found in SQL or sample source text.
func PublicDataRefsInText(text string) map[string]struct{} {
out := make(map[string]struct{})
for _, m := range publicTableRefRE.FindAllStringSubmatch(text, -1) {
if len(m) < 3 {
continue
}
ref := PublicDataProject + "." + m[1] + "." + m[2]
out[ref] = struct{}{}
}
return out
}
// PublicDataRefsFullySeeded reports whether every bigquery-public-data
// table reference in text is covered by SeededPublicTables.
func PublicDataRefsFullySeeded(text string) bool {
refs := PublicDataRefsInText(text)
if len(refs) == 0 {
return false
}
seeded := make(map[string]struct{}, len(SeededPublicTables))
for _, t := range SeededPublicTables {
seeded[t] = struct{}{}
}
for ref := range refs {
if _, ok := seeded[ref]; !ok {
return false
}
}
return true
}
// IsSeededPublicTable returns true when ref is one of the bundled tables.
// ref may be project.dataset.table or dataset.table (project assumed).
func IsSeededPublicTable(ref string) bool {
ref = strings.TrimSpace(ref)
if !strings.HasPrefix(ref, PublicDataProject+".") {
ref = PublicDataProject + "." + ref
}
return slices.Contains(SeededPublicTables, ref)
}
// Package seedfile loads a declarative YAML file at gateway startup
// and applies its datasets / tables / rows to the engine via the
// shared seed.Applier surface (gateway/seed).
//
// The YAML schema is intentionally close to the BigQuery REST API's
// dataset / table / row shape so operators who know one can read the
// other:
//
// project_id: dev # default project (optional; can also
// # set per-dataset)
// location: US # default location (optional)
// datasets:
// - id: ds
// project_id: dev # optional override
// location: US # optional override
// tables:
// - id: people
// schema:
// - {name: id, type: INT64, mode: REQUIRED}
// - {name: name, type: STRING}
// rows:
// - {id: 1, name: ada}
// - {id: 2, name: bob}
//
// The schema is the runtime seed schema; it is deliberately
// independent from the conformance/runner.Fixture format (which
// carries test-only expectations) so production seeding doesn't pick
// up assertions that have no meaning at runtime.
package seedfile
import (
"errors"
"fmt"
"os"
"strings"
"gopkg.in/yaml.v3"
)
// File is the top-level YAML schema. Defaults at this level apply
// when a per-dataset field is empty.
type File struct {
// DefaultProjectID is the project a dataset belongs to when
// the dataset itself omits project_id. When both are empty
// the loader falls back to seed.Defaults.ProjectID (the
// gateway-level --project-id), and finally errors if even
// that is missing.
DefaultProjectID string `yaml:"project_id"`
// DefaultLocation is the BigQuery location stamped on a
// dataset when neither the dataset entry nor the gateway
// supply one. Empty stays empty -- the engine will accept
// the dataset without a location and clients can read it
// back as such.
DefaultLocation string `yaml:"location"`
// Datasets enumerates the resources the loader will materialize.
Datasets []Dataset `yaml:"datasets"`
}
// Dataset describes one logical BigQuery dataset and the tables
// inside it.
type Dataset struct {
// ID is the dataset's BigQuery id. Required.
ID string `yaml:"id"`
// ProjectID overrides the file-level default for this
// dataset. Optional.
ProjectID string `yaml:"project_id"`
// Location overrides the file-level default. Optional.
Location string `yaml:"location"`
// Tables is the per-dataset table list. May be empty so
// operators can pre-create empty datasets (matches BigQuery's
// "dataset without any tables" state).
Tables []Table `yaml:"tables"`
}
// Table mirrors the REST API's Table resource. Schemas are
// positional (the column order defines the row layout) and rows
// are key/value maps keyed by column name.
type Table struct {
// ID is the table's BigQuery id. Required.
ID string `yaml:"id"`
// Schema enumerates the table's columns. Required for tables
// that include rows so the loader can lay cells out
// positionally. Empty schema is allowed for "register the
// table, no rows" workflows.
Schema []FieldSchema `yaml:"schema"`
// Rows is the per-table row list. Each row is a map keyed by
// column name; missing columns become NULL cells. Extra keys
// not in the schema are silently dropped, matching the
// `tabledata.insertAll` handler's behavior.
Rows []map[string]any `yaml:"rows"`
}
// FieldSchema mirrors enginepb.FieldSchema. We don't reuse the
// proto struct directly because the YAML decoder is happier with
// plain Go tags than with the generated protobuf struct.
type FieldSchema struct {
// Name is the column name. Required.
Name string `yaml:"name"`
// Type is the BigQuery type name (STRING, INT64, BOOL, ...).
// Required.
Type string `yaml:"type"`
// Mode is one of NULLABLE | REQUIRED | REPEATED. Empty
// defaults to NULLABLE on the engine side.
Mode string `yaml:"mode"`
// Description is a free-form column description. Optional.
Description string `yaml:"description"`
// Fields holds nested STRUCT/RECORD fields. Walked
// recursively when present.
Fields []FieldSchema `yaml:"fields"`
}
// Load reads a YAML file from disk and decodes it into File. We
// reject unknown top-level keys so a typo (e.g. `projects:` instead
// of `datasets:`) surfaces as an error rather than silently
// producing an empty seed.
//
// `path` is operator-supplied via --seed-data-file; the gosec G304
// warning is expected (the whole point of the helper is to read
// from a caller-named path) and suppressed inline.
func Load(path string) (*File, error) {
b, err := os.ReadFile(path) //nolint:gosec // path is the operator-supplied --seed-data-file
if err != nil {
return nil, fmt.Errorf("seedfile: read %s: %w", path, err)
}
return Decode(b, path)
}
// Decode parses YAML bytes into a File. The `source` argument is
// only used in error messages; pass the originating path when
// available, "" for in-memory inputs.
func Decode(data []byte, source string) (*File, error) {
var f File
dec := yaml.NewDecoder(strings.NewReader(string(data)))
dec.KnownFields(true)
if err := dec.Decode(&f); err != nil && !errors.Is(err, ErrEmptyFile) {
// io.EOF from gopkg.in/yaml.v3 means the file is
// effectively empty -- treat that as a valid no-op
// rather than a parse error.
if err.Error() == "EOF" {
return &File{}, nil
}
return nil, fmt.Errorf("seedfile: parse %s: %w", labelSource(source), err)
}
if err := f.Validate(); err != nil {
return nil, fmt.Errorf("seedfile: validate %s: %w", labelSource(source), err)
}
return &f, nil
}
// ErrEmptyFile is returned by Decode when the input is empty.
// Wrapped so callers can detect it with errors.Is.
var ErrEmptyFile = errors.New("seedfile: empty input")
// labelSource returns a non-empty descriptor for error messages.
func labelSource(s string) string {
if s == "" {
return "<input>"
}
return s
}
// Validate runs cheap structural checks before the loader starts
// talking to the engine. The error wording aims to point at the
// exact field so operator fixes are quick.
func (f *File) Validate() error {
for i, ds := range f.Datasets {
if strings.TrimSpace(ds.ID) == "" {
return fmt.Errorf("datasets[%d].id is required", i)
}
for j, tbl := range ds.Tables {
if strings.TrimSpace(tbl.ID) == "" {
return fmt.Errorf("datasets[%d].tables[%d].id is required", i, j)
}
if len(tbl.Rows) > 0 && len(tbl.Schema) == 0 {
return fmt.Errorf("datasets[%d].tables[%d].schema is required when rows are present",
i, j)
}
for k, field := range tbl.Schema {
if strings.TrimSpace(field.Name) == "" {
return fmt.Errorf("datasets[%d].tables[%d].schema[%d].name is required",
i, j, k)
}
if strings.TrimSpace(field.Type) == "" {
return fmt.Errorf("datasets[%d].tables[%d].schema[%d].type is required",
i, j, k)
}
}
}
}
return nil
}
package gateway
import (
"log/slog"
"net/http"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/handlers"
"github.com/vantaboard/bigquery-emulator/gateway/handlers/datatransfer"
"github.com/vantaboard/bigquery-emulator/gateway/middleware"
"github.com/vantaboard/bigquery-emulator/gateway/seed"
"github.com/vantaboard/bigquery-emulator/gateway/sqltools"
)
// NewServer returns the HTTP handler tree implementing the BigQuery REST
// surface. Routes use Go 1.22+ method-aware patterns.
//
// Routes here mirror the public BigQuery v2 REST API. The canonical
// emulator-side mapping (with handler pointers and status) lives in
// docs/REST_API.md; the upstream documentation we cross-check against
// lives under docs/bigquery/docs/reference/rest/v2/.
//
// Every endpoint listed in docs/REST_API.md is registered here, even if
// the handler currently returns http.StatusNotImplemented. That gives
// client libraries a stable surface to probe and lets us flip handlers
// from stub to real one resource at a time, exactly the way the
// gateway-HTTP-surface section of ROADMAP.md prescribes.
//
// Custom-method endpoints (the AIP-136 "{resource}:operation" shape used
// by datasets.undelete and the three tables IAM endpoints) cannot be
// expressed directly in net/http's mux pattern syntax, which requires
// every wildcard segment to end with `}`. For those, we register the
// parent path and dispatch on the trailing `:op` inside the handler.
func NewServer(opts Options, deps handlers.Dependencies, eng *engine.Client) http.Handler {
mux := http.NewServeMux()
mux.HandleFunc("GET /{$}", handlers.Health)
mux.HandleFunc("GET /healthz", handlers.Health)
mux.HandleFunc("/", handlers.NotFound)
mux.HandleFunc("GET /discovery/v1/apis/bigquery/v2/rest", handlers.Discovery(deps))
mountBigQueryV2(mux, deps)
mountMigration(mux, deps)
mountDataTransfer(mux)
mountSeedAPI(mux, opts, eng)
mountSQLToolsAPI(mux, opts, eng)
return wrapMiddleware(opts, mux)
}
// mountBigQueryV2 registers every BigQuery v2 endpoint under both the
// `/bigquery/v2/...` prefix (what gcloud, bq, and clients pointed at
// real `*.googleapis.com` use) AND the bare `/...` form. The bare
// form is required because the official client libraries treat
// BIGQUERY_EMULATOR_HOST as the verbatim baseUrl with no version
// segment — for example @google-cloud/bigquery v8's bigquery.js
// sets `baseUrl = EMULATOR_HOST || ${apiEndpoint}/bigquery/v2`,
// which means a client configured via BIGQUERY_EMULATOR_HOST issues
// `POST /projects/{p}/queries` (no `/bigquery/v2`). Mirroring both
// forms keeps the public REST surface working for both invocation
// styles without a StripPrefix middleware that would have to fork
// on the other top-level prefixes (`/discovery/...`, `/upload/...`,
// `/v2alpha/...`, `/v2/...`, `/v1/...`, `/healthz`).
func mountBigQueryV2(mux *http.ServeMux, deps handlers.Dependencies) {
mountBQv2 := func(method, path string, h http.HandlerFunc) {
mux.HandleFunc(method+" /bigquery/v2"+path, h)
mux.HandleFunc(method+" "+path, h)
}
mountProjectsAndDatasets(mountBQv2, deps)
mountTables(mountBQv2, deps)
mountModelsAndRoutines(mountBQv2, deps)
mountJobsAndQueries(mux, mountBQv2, deps)
}
// mountFunc is the per-method mounting helper used by the BigQuery
// v2 sub-mounters. It registers a handler under both `/bigquery/v2`
// and bare-prefix mux patterns (see mountBigQueryV2 doc-comment for
// why the bare form is required).
type mountFunc = func(method, path string, h http.HandlerFunc)
// mountProjectsAndDatasets registers projects.* and datasets.*
// (including datasets.undelete on the trailing `:undelete` segment).
func mountProjectsAndDatasets(mount mountFunc, deps handlers.Dependencies) {
mount("GET", "/projects", handlers.ProjectList(deps))
mount("GET", "/projects/{projectId}/serviceAccount", handlers.ProjectGetServiceAccount(deps))
mount("GET", "/projects/{projectId}/datasets", handlers.DatasetList(deps))
mount("POST", "/projects/{projectId}/datasets", handlers.DatasetInsert(deps))
mount("GET", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetGet(deps))
mount("PUT", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetUpdate(deps))
mount("PATCH", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetPatch(deps))
mount("DELETE", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetDelete(deps))
// datasets.undelete: POST /datasets/{datasetId}:undelete, dispatched
// on the trailing :undelete in the wildcard.
mount("POST", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetCustomMethodPOST(deps))
}
// mountTables registers tables.*, tabledata.*, and the table-scoped
// rowAccessPolicies surface. The trailing `:getIamPolicy` /
// `:setIamPolicy` / `:testIamPermissions` custom methods are
// dispatched in-handler because Go's mux can't match them directly.
func mountTables(mount mountFunc, deps handlers.Dependencies) {
mount("GET", "/projects/{projectId}/datasets/{datasetId}/tables", handlers.TableList(deps))
mount("POST", "/projects/{projectId}/datasets/{datasetId}/tables", handlers.TableInsert(deps))
mount("GET", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TableGet(deps))
mount("PUT", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TableUpdate(deps))
mount("PATCH", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TablePatch(deps))
mount("DELETE", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TableDelete(deps))
mount(
"POST",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}",
handlers.TableCustomMethodPOST(deps),
)
mount("GET", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/data", handlers.TableDataList(deps))
mount(
"POST",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/insertAll",
handlers.TableDataInsertAll(deps),
)
// Row-access policies (table-scoped row-level security).
mount(
"GET",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies",
handlers.RowAccessPolicyDispatch(deps),
)
mount(
"POST",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies",
handlers.RowAccessPolicyDispatch(deps),
)
mount(
"GET",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
handlers.RowAccessPolicyDispatch(deps),
)
mount(
"PUT",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
handlers.RowAccessPolicyDispatch(deps),
)
mount(
"DELETE",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
handlers.RowAccessPolicyDispatch(deps),
)
mount(
"POST",
"/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
handlers.RowAccessPolicyDispatch(deps),
)
}
// mountModelsAndRoutines registers the BQML and routines (UDF / TVF
// / stored procedure) endpoints. BQML has no engine backing; routines
// delegate to the engine catalog when wired to emulator_main and mirror
// metadata in the gateway routines store for timestamps and DDL paths.
func mountModelsAndRoutines(mount mountFunc, deps handlers.Dependencies) {
mount("GET", "/projects/{projectId}/datasets/{datasetId}/models", handlers.ModelList(deps))
mount("GET", "/projects/{projectId}/datasets/{datasetId}/models/{modelId}", handlers.ModelGet(deps))
mount("PATCH", "/projects/{projectId}/datasets/{datasetId}/models/{modelId}", handlers.ModelPatch(deps))
mount("DELETE", "/projects/{projectId}/datasets/{datasetId}/models/{modelId}", handlers.ModelDelete(deps))
mount("GET", "/projects/{projectId}/datasets/{datasetId}/routines", handlers.RoutineList(deps))
mount("POST", "/projects/{projectId}/datasets/{datasetId}/routines", handlers.RoutineInsert(deps))
mount("GET", "/projects/{projectId}/datasets/{datasetId}/routines/{routineId}", handlers.RoutineGet(deps))
mount("PUT", "/projects/{projectId}/datasets/{datasetId}/routines/{routineId}", handlers.RoutineUpdate(deps))
mount("DELETE", "/projects/{projectId}/datasets/{datasetId}/routines/{routineId}", handlers.RoutineDelete(deps))
}
// mountJobsAndQueries registers jobs.* (including the upload variant
// of jobs.insert) and the synchronous queries.* endpoints. The
// trailing `/delete` on jobs.delete is not a typo; see
// docs/bigquery/docs/reference/rest/v2/jobs/delete.md.
func mountJobsAndQueries(mux *http.ServeMux, mount mountFunc, deps handlers.Dependencies) {
mount("GET", "/projects/{projectId}/jobs", handlers.JobList(deps))
mount("POST", "/projects/{projectId}/jobs", handlers.JobInsert(deps))
// jobs.insert media-upload variant. The upload prefix is fixed by
// the public BigQuery API and the client libraries hardcode it, so
// only the `/upload/bigquery/v2/...` form is registered here.
mux.HandleFunc("POST /upload/bigquery/v2/projects/{projectId}/jobs", handlers.JobInsertUpload(deps))
mux.HandleFunc("PUT /upload/bigquery/v2/projects/{projectId}/jobs", handlers.JobInsertUpload(deps))
mount("GET", "/projects/{projectId}/jobs/{jobId}", handlers.JobGet(deps))
mount("POST", "/projects/{projectId}/jobs/{jobId}/cancel", handlers.JobCancel(deps))
mount("DELETE", "/projects/{projectId}/jobs/{jobId}/delete", handlers.JobDelete(deps))
mount("POST", "/projects/{projectId}/queries", handlers.QueryRun(deps))
mount("GET", "/projects/{projectId}/queries/{jobId}", handlers.QueryGetResults(deps))
}
// mountMigration registers the BigQuery Migration v2alpha surface
// (alias-served at v2 too). The official client libraries read
// BIGQUERY_MIGRATION_EMULATOR_HOST and fall back to
// BIGQUERY_EMULATOR_HOST, so this gateway covers both surfaces from
// the same listener. List returns the empty page so startup probes
// succeed; create/start/get/delete return the documented 404/501.
// See gateway/handlers/migration.go.
func mountMigration(mux *http.ServeMux, deps handlers.Dependencies) {
for _, ver := range []string{"v2alpha", "v2"} {
base := "/" + ver + "/projects/{projectId}/locations/{location}/workflows"
mux.HandleFunc("GET "+base, handlers.MigrationWorkflowList(deps))
mux.HandleFunc("POST "+base, handlers.MigrationWorkflowCreate(deps))
mux.HandleFunc("GET "+base+"/{workflowId}", handlers.MigrationWorkflowGet(deps))
mux.HandleFunc("DELETE "+base+"/{workflowId}", handlers.MigrationWorkflowDelete(deps))
// AIP-136 custom methods (only :start today) — Go's mux can't
// match `{workflowId}:start` directly, so dispatch in-handler.
mux.HandleFunc("POST "+base+"/{workflowId}", handlers.MigrationWorkflowCustomMethodPOST(deps))
}
}
// mountDataTransfer registers the BigQuery Data Transfer Service v1
// surface. The shallow-emulator port per docs/ENGINE_POLICY.md
// replaces the empty shell that lived in
// gateway/handlers/data_transfer.go: dataSources catalog
// (`scheduled_query`, `amazon_s3`), in-memory CRUD for
// transferConfigs + transferRuns, and the AIP-136 custom methods
// (`scheduleRuns`, `checkValidCreds`, `startManualRuns`). See
// `docs/ENGINE_POLICY.md`.
func mountDataTransfer(mux *http.ServeMux) {
dts := datatransfer.NewHandler(nil)
dts.Register(mux)
}
// mountSeedAPI registers the seed API surface only when explicitly
// enabled via --enable-seed-api. The routes refuse non-loopback
// callers by default; an operator who needs CI/CD reach must combine
// `--seed-api-allow-remote` with `--seed-api-seed-token` for the
// documented defense-in-depth posture. The Runner is left nil
// when eng is nil because the default build does not link
// cloud.google.com/go/bigquery; building with
// `-tags=seed_production_live` adds the production runner. In
// Runner=nil mode the POST handler returns 501 with the documented
// "use --seed-data-file" message so operators see a meaningful error
// instead of a hung op.
func mountSeedAPI(mux *http.ServeMux, opts Options, eng *engine.Client) {
if !opts.EnableSeedAPI {
return
}
var runner seed.Runner
if eng != nil {
runner = newSeedRunner(opts, eng)
}
seed.RegisterRoutes(mux, seed.HandlerDeps{
Access: seed.AccessConfig{
AllowRemote: opts.SeedAPIAllowRemote,
Token: opts.SeedAPISeedToken,
},
Store: seed.NewStore(),
Runner: runner,
})
}
// mountSQLToolsAPI registers POST /api/emulator/sql/* when enabled.
func mountSQLToolsAPI(mux *http.ServeMux, opts Options, eng *engine.Client) {
if !opts.EnableSQLToolsAPI {
return
}
sqltools.RegisterRoutes(mux, sqltools.HandlerDeps{
Access: sqltools.AccessConfig{
AllowRemote: opts.SQLToolsAPIAllowRemote,
Token: opts.SQLToolsAPISeedToken,
},
Client: eng,
})
}
// wrapMiddleware applies the gateway's standing middleware stack
// (gunzip, auth, optional structured request log) on top of the
// raw mux. Returned handler is what the gateway listens on.
func wrapMiddleware(opts Options, mux http.Handler) http.Handler {
// Gunzip middleware runs FIRST so handlers see the inflated JSON
// body. The Java BigQuery client sets `Content-Encoding: gzip` on
// every POST/PUT/PATCH by default; without this the gateway's
// JSON decoders trip on the gzip framing magic byte (`\x1f`) and
// emit `invalid character '\x1f' looking for beginning of value`.
// See gateway/middleware/gunzip.go for the contract.
handler := middleware.WithGunzipRequestBody(mux)
// Auth middleware always runs: it parses (but never validates) the
// Authorization header and attaches a synthetic principal to the
// request context. Per docs/REST_API.md and the
// gateway-HTTP-surface section of ROADMAP.md, the emulator must
// never 401, so this is permissive by design.
handler = middleware.WithAuth(handler)
// Loopback tag middleware always runs: it records whether the
// request arrived from a loopback caller so handlers can gate
// emulator-internal debug fields on it. The single user today is
// the synchronous query handler, which surfaces
// `Job.statistics.query.emulatorRoute` (the C++ coordinator's
// canonical route disposition string) only to loopback callers
// per `docs/ENGINE_POLICY.md`.
handler = middleware.WithLoopbackTag(handler)
// X-HTTP-Method-Override translation runs OUTSIDE every other
// middleware that inspects `r.Method` (auth/loopback/gunzip all
// ignore the verb, the mux dispatches on it) but INSIDE the
// access-log layer below so the log line reflects the original
// `POST` the client put on the wire. The Java google-http-client
// `MethodOverride` interceptor that ships enabled in every Google
// Cloud Java SDK uses this header to tunnel PATCH/PUT/DELETE
// through POST (Java `URLConnection` historically rejected
// `setRequestMethod("PATCH")`), which is why the BigQuery Java
// sample `AuthorizeDatasetIT` was 405-ing on
// `POST /projects/{p}/datasets/{d}` until tp05. See
// `gateway/middleware/method_override.go` for the contract.
handler = middleware.WithMethodOverride(handler)
if opts.LogRequests {
logger := opts.Logger
if logger == nil {
logger = slog.New(slog.DiscardHandler)
}
handler = loggingMiddleware(logger, handler)
}
return handler
}
// loggingMiddleware logs each completed HTTP request as a structured
// slog event. Routing the request line through key/value pairs (instead
// of `log.Printf("%s %s ...", ...)`) keeps the logger's typed-value
// path between gateway and handler, defangs gosec G706's
// log-injection finding (the attacker-controlled URI never lands in a
// format-string position), and lets operators ship the JSON output to
// structured backends.
func loggingMiddleware(logger *slog.Logger, next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
start := time.Now()
// Snapshot the method as it arrived on the wire BEFORE any
// downstream middleware rewrites it. WithMethodOverride (see
// wrapMiddleware) replaces `r.Method` in place when a POST
// tunnels a PATCH/PUT/DELETE via X-HTTP-Method-Override, so
// reading `r.Method` after next.ServeHTTP would log the
// rewritten verb and lose the literal POST the operator
// actually needs to see in the access log when debugging a
// 405 / route-mismatch.
method := r.Method
rw := &statusRecorder{ResponseWriter: w, status: http.StatusOK}
next.ServeHTTP(rw, r)
logger.InfoContext(r.Context(), "request",
slog.String("method", method),
slog.String("uri", r.URL.RequestURI()),
slog.Int("status", rw.status),
slog.Duration("dur", time.Since(start)),
)
})
}
type statusRecorder struct {
http.ResponseWriter
status int
}
func (s *statusRecorder) WriteHeader(code int) {
s.status = code
s.ResponseWriter.WriteHeader(code)
}
// Package session is the gateway-side, in-memory BigQuery session registry.
// Sessions are minted when a query job requests createSession=true and are
// reattached on follow-up queries that pass connectionProperties session_id.
package session
import (
"crypto/rand"
"encoding/hex"
"sync"
"github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)
// Store tracks server-generated session ids for the lifetime of the gateway
// process. State is volatile; restarts wipe the table.
type Store struct {
mu sync.RWMutex
byID map[string]record
}
type record struct {
projectID string
location string
}
// NewStore returns an empty session registry.
func NewStore() *Store {
return &Store{byID: map[string]record{}}
}
// Resolve returns sessionInfo for a query/job when createSession is set or when
// connectionProperties carries session_id. Returns nil for non-session queries.
func (s *Store) Resolve(
projectID, location string,
createSession bool,
connProps []bqtypes.ConnectionProperty,
) *bqtypes.SessionInfo {
if s == nil {
return nil
}
if createSession {
return s.mint(projectID, location)
}
if sid := connectionSessionID(connProps); sid != "" {
s.register(sid, projectID, location)
return &bqtypes.SessionInfo{SessionID: sid}
}
return nil
}
func (s *Store) mint(projectID, location string) *bqtypes.SessionInfo {
id := newSessionID()
s.register(id, projectID, location)
return &bqtypes.SessionInfo{SessionID: id}
}
func (s *Store) register(id, projectID, location string) {
s.mu.Lock()
defer s.mu.Unlock()
if _, ok := s.byID[id]; !ok {
s.byID[id] = record{projectID: projectID, location: location}
}
}
func connectionSessionID(props []bqtypes.ConnectionProperty) string {
for _, p := range props {
if p.Key == "session_id" && p.Value != "" {
return p.Value
}
}
return ""
}
func newSessionID() string {
b := make([]byte, 16)
_, _ = rand.Read(b)
return hex.EncodeToString(b)
}
// Package snapshots retains soft-deleted table data so COPY jobs can
// read snapshot decorators (table@epoch) for undelete samples.
package snapshots
import (
"context"
"fmt"
"slices"
"strconv"
"strings"
"sync"
"time"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
const defaultPageSize = 10_000
// Entry is a point-in-time capture of a table's schema and rows.
type Entry struct {
Schema *enginepb.TableSchema
Rows []*enginepb.DataRow
CreationTimeMs int64
DeletionTimeMs int64
}
// Store retains deleted-table snapshots and live-table creation times
// for snapshot decorator resolution.
type Store struct {
mu sync.RWMutex
creationTimes map[string]int64
deleted map[string][]Entry
}
// NewStore returns an empty snapshot store.
func NewStore() *Store {
return &Store{
creationTimes: map[string]int64{},
deleted: map[string][]Entry{},
}
}
func tableKey(projectID, datasetID, tableID string) string {
return projectID + ":" + datasetID + "." + tableID
}
// RecordCreation stamps the creation time for a live table. Called when
// a table is first registered so tables.get returns a stable epoch.
func (s *Store) RecordCreation(projectID, datasetID, tableID string, createdMs int64) {
if s == nil {
return
}
s.mu.Lock()
defer s.mu.Unlock()
key := tableKey(projectID, datasetID, tableID)
if _, ok := s.creationTimes[key]; !ok {
s.creationTimes[key] = createdMs
}
}
// CreationTimeMs returns the recorded creation epoch for a live table.
func (s *Store) CreationTimeMs(projectID, datasetID, tableID string) (int64, bool) {
if s == nil {
return 0, false
}
s.mu.RLock()
defer s.mu.RUnlock()
t, ok := s.creationTimes[tableKey(projectID, datasetID, tableID)]
return t, ok
}
// CaptureBeforeDelete snapshots schema and rows before DropTable.
func (s *Store) CaptureBeforeDelete(ctx context.Context, catalog enginepb.CatalogClient,
projectID, datasetID, tableID string,
) error {
if s == nil || catalog == nil {
return nil
}
ref := &enginepb.TableRef{
ProjectId: projectID,
DatasetId: datasetID,
TableId: tableID,
}
desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
if err != nil {
return fmt.Errorf("describe table for snapshot: %w", err)
}
rows, err := listAllRows(ctx, catalog, ref, desc.GetSchema())
if err != nil {
return err
}
now := time.Now().UTC().UnixMilli()
s.mu.Lock()
defer s.mu.Unlock()
key := tableKey(projectID, datasetID, tableID)
created := s.creationTimes[key]
if created == 0 {
created = now
}
s.deleted[key] = append(s.deleted[key], Entry{
Schema: desc.GetSchema(),
Rows: rows,
CreationTimeMs: created,
DeletionTimeMs: now,
})
delete(s.creationTimes, key)
return nil
}
// ResolveAtEpoch returns snapshot data for table@epoch decorators.
func (s *Store) ResolveAtEpoch(projectID, datasetID, tableID string, epochMs int64,
) (*Entry, error) {
if s == nil {
return nil, fmt.Errorf("table %s.%s.%s@%d not found (snapshot store unavailable)",
projectID, datasetID, tableID, epochMs)
}
s.mu.RLock()
defer s.mu.RUnlock()
entries := s.deleted[tableKey(projectID, datasetID, tableID)]
for _, v := range slices.Backward(entries) {
e := v
if epochMs >= e.CreationTimeMs && epochMs <= e.DeletionTimeMs {
return &e, nil
}
}
return nil, fmt.Errorf("not found: Table %s:%s.%s@%d", projectID, datasetID, tableID, epochMs)
}
// ParseDecorator splits tableId@epoch into base id and epoch milliseconds.
// Supports absolute (@123) and relative (@-3600000) decorators.
func ParseDecorator(tableID string) (base string, epochMs int64, decorated bool) {
at := strings.LastIndex(tableID, "@")
if at <= 0 || at == len(tableID)-1 {
return tableID, 0, false
}
base = tableID[:at]
raw := tableID[at+1:]
if strings.HasPrefix(raw, "-") {
offset, err := strconv.ParseInt(raw, 10, 64)
if err != nil {
return tableID, 0, false
}
return base, time.Now().UTC().UnixMilli() + offset, true
}
epoch, err := strconv.ParseInt(raw, 10, 64)
if err != nil {
return tableID, 0, false
}
return base, epoch, true
}
func listAllRows(ctx context.Context, catalog enginepb.CatalogClient,
ref *enginepb.TableRef, schema *enginepb.TableSchema,
) ([]*enginepb.DataRow, error) {
var out []*enginepb.DataRow
start := int64(0)
for {
resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
Table: ref,
StartIndex: start,
MaxResults: defaultPageSize,
})
if err != nil {
return nil, fmt.Errorf("list rows for snapshot: %w", err)
}
rows := resp.GetRows()
if len(rows) == 0 {
break
}
out = append(out, rows...)
start += int64(len(rows))
if start >= resp.GetTotalRows() {
break
}
}
_ = schema
return out, nil
}
package sqltools
import (
"net"
"net/http"
"strings"
)
// AccessConfig captures loopback / token gates for the SQL tools routes.
type AccessConfig struct {
AllowRemote bool
Token string
}
// HeaderName is the canonical token header for remote SQL tools access.
const HeaderName = "X-BigQuery-Emulator-SqlTools-Token"
// CheckAccess enforces the loopback / token gates on r.
func (c AccessConfig) CheckAccess(r *http.Request) error {
if !c.AllowRemote {
if !isLoopback(r.RemoteAddr) {
return ErrAccessDenied
}
}
if c.Token != "" {
got := r.Header.Get(HeaderName)
if !secureEqual(got, c.Token) {
return ErrAccessDenied
}
}
return nil
}
// ErrAccessDenied is returned when access checks fail.
var ErrAccessDenied = httpError{code: http.StatusForbidden, msg: "sqltools: access denied"}
type httpError struct {
code int
msg string
}
func (e httpError) Error() string { return e.msg }
// Status returns the HTTP status for this error.
func (e httpError) Status() int { return e.code }
func secureEqual(a, b string) bool {
if len(a) != len(b) {
return false
}
var diff byte
for i := range len(a) {
diff |= a[i] ^ b[i]
}
return diff == 0
}
func isLoopback(remoteAddr string) bool {
host, _, err := net.SplitHostPort(remoteAddr)
if err != nil {
host = remoteAddr
}
host = strings.TrimSpace(host)
if host == "" {
return true
}
ip := net.ParseIP(host)
if ip == nil {
return false
}
return ip.IsLoopback()
}
package sqltools
import (
"encoding/json"
"io"
"net/http"
"github.com/vantaboard/bigquery-emulator/gateway/engine"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
const (
statusInvalid = "invalid"
errSQLRequired = "sql is required"
)
// HandlerDeps bundles dependencies for SQL tools HTTP handlers.
type HandlerDeps struct {
Access AccessConfig
Client *engine.Client
}
// RegisterRoutes installs SQL tools HTTP handlers under /api/emulator/sql/*.
func RegisterRoutes(mux *http.ServeMux, deps HandlerDeps) {
mux.HandleFunc("GET /api/emulator/sql/capabilities", deps.handleCapabilities)
mux.HandleFunc("POST /api/emulator/sql/format", deps.handleFormat)
mux.HandleFunc("POST /api/emulator/sql/parse", deps.handleParse)
mux.HandleFunc("POST /api/emulator/sql/tokenize", deps.handleTokenize)
mux.HandleFunc("POST /api/emulator/sql/complete", deps.handleComplete)
mux.HandleFunc("POST /api/emulator/sql/analyze", deps.handleAnalyze)
}
type errEnvelope struct {
Code int `json:"code"`
Status string `json:"status"`
Message string `json:"message"`
}
func writeJSON(w http.ResponseWriter, code int, v any) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(code)
_ = json.NewEncoder(w).Encode(v)
}
func writeAccessError(w http.ResponseWriter, err error) {
if he, ok := err.(interface{ Status() int }); ok {
writeJSON(w, he.Status(), errEnvelope{
Code: he.Status(),
Status: statusInvalid,
Message: err.Error(),
})
return
}
writeJSON(w, http.StatusForbidden, errEnvelope{
Code: http.StatusForbidden,
Status: statusInvalid,
Message: err.Error(),
})
}
func writeGrpcError(w http.ResponseWriter, err error) {
st, ok := status.FromError(err)
if !ok {
writeJSON(w, http.StatusInternalServerError, errEnvelope{
Code: http.StatusInternalServerError,
Status: statusInvalid,
Message: err.Error(),
})
return
}
httpCode := http.StatusInternalServerError
switch st.Code() {
case codes.InvalidArgument:
httpCode = http.StatusBadRequest
case codes.NotFound:
httpCode = http.StatusNotFound
case codes.FailedPrecondition:
httpCode = http.StatusPreconditionFailed
case codes.Unimplemented:
httpCode = http.StatusNotImplemented
}
writeJSON(w, httpCode, errEnvelope{
Code: httpCode,
Status: statusInvalid,
Message: st.Message(),
})
}
func (d HandlerDeps) requireClient(w http.ResponseWriter) bool {
if d.Client == nil || d.Client.SQLTools == nil {
writeJSON(w, http.StatusServiceUnavailable, errEnvelope{
Code: http.StatusServiceUnavailable,
Status: statusInvalid,
Message: "sql tools engine client is not configured",
})
return false
}
return true
}
func (d HandlerDeps) readBody(w http.ResponseWriter, r *http.Request) ([]byte, bool) {
body, err := io.ReadAll(r.Body)
if err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest,
Status: statusInvalid,
Message: "Could not read request body: " + err.Error(),
})
return nil, false
}
return body, true
}
type formatRequest struct {
offsetRequest
SQL string `json:"sql"`
Strict bool `json:"strict"`
LineLengthLimit int32 `json:"lineLengthLimit"`
IndentationSpaces int32 `json:"indentationSpaces"`
}
type formatResponse struct {
FormattedSQL string `json:"formattedSql"`
Diagnostics []diagnosticWire `json:"diagnostics,omitempty"`
}
func (d HandlerDeps) handleFormat(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
if !d.requireClient(w) {
return
}
body, ok := d.readBody(w, r)
if !ok {
return
}
var req formatRequest
if err := json.Unmarshal(body, &req); err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "invalid JSON: " + err.Error(),
})
return
}
if req.SQL == "" {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: errSQLRequired,
})
return
}
resp, err := d.Client.SQLTools.Format(r.Context(), &enginepb.FormatSqlRequest{
Sql: req.SQL,
Strict: req.Strict,
LineLengthLimit: req.LineLengthLimit,
IndentationSpaces: req.IndentationSpaces,
})
if err != nil {
writeGrpcError(w, err)
return
}
out := formatResponse{FormattedSQL: resp.GetFormattedSql()}
for _, diag := range resp.GetDiagnostics() {
out.Diagnostics = append(out.Diagnostics,
diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
}
writeJSON(w, http.StatusOK, out)
}
type parseRequest struct {
offsetRequest
SQL string `json:"sql"`
}
type parseResponse struct {
StatementKinds []string `json:"statementKinds"`
Diagnostics []diagnosticWire `json:"diagnostics,omitempty"`
}
func (d HandlerDeps) handleParse(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
if !d.requireClient(w) {
return
}
body, ok := d.readBody(w, r)
if !ok {
return
}
var req parseRequest
if err := json.Unmarshal(body, &req); err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "invalid JSON: " + err.Error(),
})
return
}
if req.SQL == "" {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: errSQLRequired,
})
return
}
resp, err := d.Client.SQLTools.Parse(r.Context(), &enginepb.ParseSqlRequest{Sql: req.SQL})
if err != nil {
writeGrpcError(w, err)
return
}
out := parseResponse{StatementKinds: resp.GetStatementKinds()}
for _, diag := range resp.GetDiagnostics() {
out.Diagnostics = append(out.Diagnostics,
diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
}
writeJSON(w, http.StatusOK, out)
}
type tokenizeRequest struct {
offsetRequest
SQL string `json:"sql"`
IncludeComments bool `json:"includeComments"`
}
type tokenizeResponse struct {
Tokens []tokenWire `json:"tokens"`
Diagnostics []diagnosticWire `json:"diagnostics,omitempty"`
}
func (d HandlerDeps) handleTokenize(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
if !d.requireClient(w) {
return
}
body, ok := d.readBody(w, r)
if !ok {
return
}
var req tokenizeRequest
if err := json.Unmarshal(body, &req); err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "invalid JSON: " + err.Error(),
})
return
}
resp, err := d.Client.SQLTools.Tokenize(r.Context(), &enginepb.TokenizeSqlRequest{
Sql: req.SQL, IncludeComments: req.IncludeComments,
})
if err != nil {
writeGrpcError(w, err)
return
}
out := tokenizeResponse{}
for _, tok := range resp.GetTokens() {
out.Tokens = append(out.Tokens, tokenFromProto(req.SQL, req.OffsetUnit, tok))
}
for _, diag := range resp.GetDiagnostics() {
out.Diagnostics = append(out.Diagnostics,
diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
}
writeJSON(w, http.StatusOK, out)
}
type completeRequest struct {
offsetRequest
ProjectID string `json:"projectId"`
DefaultDatasetID string `json:"defaultDatasetId"`
SQL string `json:"sql"`
CursorByteOffset int32 `json:"cursorByteOffset"`
}
type candidateWire struct {
Label string `json:"label"`
Kind string `json:"kind"`
InsertText string `json:"insertText"`
Detail string `json:"detail,omitempty"`
Fqn string `json:"fqn,omitempty"`
}
type completeResponse struct {
Candidates []candidateWire `json:"candidates"`
ReplacementStart int32 `json:"replacementStart"`
ReplacementEnd int32 `json:"replacementEnd"`
}
func (d HandlerDeps) handleComplete(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
if !d.requireClient(w) {
return
}
body, ok := d.readBody(w, r)
if !ok {
return
}
var req completeRequest
if err := json.Unmarshal(body, &req); err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "invalid JSON: " + err.Error(),
})
return
}
if req.ProjectID == "" {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "projectId is required",
})
return
}
cursor := convertCursorToUTF8(req.SQL, req.OffsetUnit, req.CursorByteOffset)
resp, err := d.Client.SQLTools.Complete(r.Context(), &enginepb.CompleteSqlRequest{
ProjectId: req.ProjectID,
DefaultDatasetId: req.DefaultDatasetID,
Sql: req.SQL,
CursorByteOffset: cursor,
})
if err != nil {
writeGrpcError(w, err)
return
}
replStart, replEnd := convertReplacementFromUTF8(
req.SQL, req.OffsetUnit, resp.GetReplacementStart(), resp.GetReplacementEnd())
out := completeResponse{
ReplacementStart: replStart,
ReplacementEnd: replEnd,
}
for _, c := range resp.GetCandidates() {
out.Candidates = append(out.Candidates, candidateWire{
Label: c.GetLabel(), Kind: c.GetKind(), InsertText: c.GetInsertText(),
Detail: c.GetDetail(), Fqn: c.GetFqn(),
})
}
writeJSON(w, http.StatusOK, out)
}
type capabilitiesResponse struct {
SQLTools bool `json:"sqlTools"`
Version string `json:"version"`
Endpoints []string `json:"endpoints"`
OffsetUnits []string `json:"offsetUnits"`
}
func (d HandlerDeps) handleCapabilities(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
writeJSON(w, http.StatusOK, capabilitiesResponse{
SQLTools: true,
Version: sqlToolsVersion,
Endpoints: []string{
"format", "parse", "tokenize", "complete", "analyze", "capabilities",
},
OffsetUnits: []string{offsetUnitUTF8, offsetUnitUTF16},
})
}
type analyzeRequest struct {
offsetRequest
ProjectID string `json:"projectId"`
DefaultDatasetID string `json:"defaultDatasetId"`
SQL string `json:"sql"`
}
type referencedTableWire struct {
ProjectID string `json:"projectId"`
DatasetID string `json:"datasetId"`
TableID string `json:"tableId"`
Alias string `json:"alias,omitempty"`
Kind string `json:"kind"`
}
type analyzeResponse struct {
ReferencedTables []referencedTableWire `json:"referencedTables"`
StatementKinds []string `json:"statementKinds"`
Diagnostics []diagnosticWire `json:"diagnostics,omitempty"`
}
func (d HandlerDeps) handleAnalyze(w http.ResponseWriter, r *http.Request) {
if err := d.Access.CheckAccess(r); err != nil {
writeAccessError(w, err)
return
}
if !d.requireClient(w) {
return
}
body, ok := d.readBody(w, r)
if !ok {
return
}
var req analyzeRequest
if err := json.Unmarshal(body, &req); err != nil {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "invalid JSON: " + err.Error(),
})
return
}
if req.ProjectID == "" {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: "projectId is required",
})
return
}
if req.SQL == "" {
writeJSON(w, http.StatusBadRequest, errEnvelope{
Code: http.StatusBadRequest, Status: statusInvalid,
Message: errSQLRequired,
})
return
}
resp, err := d.Client.SQLTools.Analyze(r.Context(), &enginepb.AnalyzeSqlRequest{
ProjectId: req.ProjectID,
DefaultDatasetId: req.DefaultDatasetID,
Sql: req.SQL,
})
if err != nil {
writeGrpcError(w, err)
return
}
out := analyzeResponse{StatementKinds: resp.GetStatementKinds()}
for _, diag := range resp.GetDiagnostics() {
out.Diagnostics = append(out.Diagnostics,
diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
}
for _, table := range resp.GetReferencedTables() {
out.ReferencedTables = append(out.ReferencedTables, referencedTableWire{
ProjectID: table.GetProjectId(),
DatasetID: table.GetDatasetId(),
TableID: table.GetTableId(),
Alias: table.GetAlias(),
Kind: table.GetKind(),
})
}
writeJSON(w, http.StatusOK, out)
}
package sqltools
import (
"math"
"strings"
"unicode/utf16"
"github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)
const (
offsetUnitUTF8 = "utf8"
offsetUnitUTF16 = "utf16"
sqlToolsVersion = "1.0"
)
func normalizeOffsetUnit(unit string) string {
switch strings.ToLower(strings.TrimSpace(unit)) {
case offsetUnitUTF16:
return offsetUnitUTF16
default:
return offsetUnitUTF8
}
}
func utf8ByteOffsetToCodeUnit(sql string, byteOffset int) int {
if byteOffset <= 0 {
return 0
}
if byteOffset > len(sql) {
byteOffset = len(sql)
}
return len(utf16.Encode([]rune(sql[:byteOffset])))
}
func codeUnitToUtf8ByteOffset(sql string, codeUnit int) int {
if codeUnit <= 0 {
return 0
}
units := utf16.Encode([]rune(sql))
if codeUnit > len(units) {
codeUnit = len(units)
}
if codeUnit == 0 {
return 0
}
return len(string(utf16.Decode(units[:codeUnit])))
}
func int32FromInt(v int) int32 {
if v > int(math.MaxInt32) {
return math.MaxInt32
}
if v < int(math.MinInt32) {
return math.MinInt32
}
return int32(v)
}
func int32Ptr(v int32) *int32 {
if v < 0 {
return nil
}
out := v
return &out
}
func convertCursorToUTF8(sql string, unit string, cursor int32) int32 {
if normalizeOffsetUnit(unit) != offsetUnitUTF16 {
return cursor
}
return int32FromInt(codeUnitToUtf8ByteOffset(sql, int(cursor)))
}
func convertReplacementFromUTF8(sql string, unit string, start, end int32) (int32, int32) {
if normalizeOffsetUnit(unit) != offsetUnitUTF16 {
return start, end
}
return int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(start))),
int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(end)))
}
type diagnosticWire struct {
Line int32 `json:"line"`
Column int32 `json:"column"`
Message string `json:"message"`
Severity string `json:"severity"`
EndLine int32 `json:"endLine,omitempty"`
EndColumn int32 `json:"endColumn,omitempty"`
StartByte *int32 `json:"startByte,omitempty"`
EndByte *int32 `json:"endByte,omitempty"`
StartUTF16 *int32 `json:"startUtf16,omitempty"`
EndUTF16 *int32 `json:"endUtf16,omitempty"`
}
func diagnosticFromProto(sql string, unit string, diag *enginepb.SqlDiagnostic) diagnosticWire {
out := diagnosticWire{
Line: diag.GetLine(),
Column: diag.GetColumn(),
Message: diag.GetMessage(),
Severity: diag.GetSeverity(),
EndLine: diag.GetEndLine(),
EndColumn: diag.GetEndColumn(),
StartByte: int32Ptr(diag.GetStartByte()),
EndByte: int32Ptr(diag.GetEndByte()),
}
if normalizeOffsetUnit(unit) == offsetUnitUTF16 && sql != "" {
if out.StartByte != nil {
out.StartUTF16 = int32Ptr(
int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(*out.StartByte))))
}
if out.EndByte != nil {
out.EndUTF16 = int32Ptr(
int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(*out.EndByte))))
}
}
return out
}
type tokenWire struct {
Kind string `json:"kind"`
Image string `json:"image"`
StartByte int32 `json:"startByte"`
EndByte int32 `json:"endByte"`
StartUTF16 *int32 `json:"startUtf16,omitempty"`
EndUTF16 *int32 `json:"endUtf16,omitempty"`
}
func tokenFromProto(sql string, unit string, tok *enginepb.SqlToken) tokenWire {
out := tokenWire{
Kind: tok.GetKind(),
Image: tok.GetImage(),
StartByte: tok.GetStartByte(),
EndByte: tok.GetEndByte(),
}
if normalizeOffsetUnit(unit) == offsetUnitUTF16 && sql != "" {
out.StartUTF16 = int32Ptr(
int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(out.StartByte))))
out.EndUTF16 = int32Ptr(
int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(out.EndByte))))
}
return out
}
type offsetRequest struct {
OffsetUnit string `json:"offsetUnit"`
}
// Package storagetmpl materializes an initial-data template tree into
// the persistent storage root the engine reads on startup.
//
// An operator (or a
// container image) ships a pre-populated catalog plus row files, and
// the emulator should copy them into the runtime data-dir on first
// boot so a downstream client sees a populated catalog at t=0. Once
// the data-dir is initialized, subsequent boots are no-ops -- the
// template is only ever copied into an empty (or absent) destination
// so we never clobber writes a previous run committed.
//
// "Initialized" detection. This repo's engine uses DuckDB, which
// keeps its catalog in a single file named `catalog.duckdb` plus
// sidecar `.parquet` / `.meta.json` files (see
// backend/storage/duckdb/duckdb_storage.cc). The presence of
// `catalog.duckdb` is therefore our sentinel: when it already
// exists in the destination we leave the tree alone and assume a
// prior run owns it. When the destination is absent OR exists but
// does not yet contain a `catalog.duckdb` file, we treat it as a
// fresh data-dir and copy the entire template tree in.
package storagetmpl
import (
"errors"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
)
// catalogSentinel is the file whose presence in the destination
// data-dir means the engine has already initialized this catalog.
// Exported as a package-level var so tests can swap it for a
// storage-backend-specific sentinel without rewriting the helper.
var catalogSentinel = "catalog.duckdb"
// MaybeMaterialize copies `template` into `dataDir` when `dataDir`
// does not yet contain the engine's initialized-catalog sentinel.
//
// Returns nil (no-op) when:
// - template is "" (operator did not configure an initial-data-dir).
// - dataDir is "" (the engine will use its in-memory fallback; there
// is no on-disk tree to seed).
// - dataDir already contains the sentinel file (a previous run owns
// the catalog; copying would clobber writes).
//
// Returns an error when:
// - template does not exist, is not a directory, or is unreadable.
// - dataDir exists but is not a directory.
// - any file copy fails (disk full, permission denied, ...).
//
// The copy preserves file mode bits but does NOT preserve
// ownership/atime/mtime; the engine does not depend on either, and
// `os.Chown` would require CAP_CHOWN for cross-uid operator
// scenarios that are out of scope here.
func MaybeMaterialize(template, dataDir string) error {
if template == "" || dataDir == "" {
return nil
}
tplInfo, err := os.Stat(template)
if err != nil {
return fmt.Errorf("storagetmpl: stat template %q: %w", template, err)
}
if !tplInfo.IsDir() {
return fmt.Errorf("storagetmpl: template %q is not a directory", template)
}
// If the destination already exists, ensure it's a directory.
dstInfo, err := os.Stat(dataDir)
switch {
case err == nil:
if !dstInfo.IsDir() {
return fmt.Errorf("storagetmpl: data-dir %q exists but is not a directory", dataDir)
}
// Already initialized? Sentinel presence wins; treat
// this run as a continuation rather than reseeding.
if isInitialized(dataDir) {
return nil
}
case errors.Is(err, fs.ErrNotExist):
if mkErr := os.MkdirAll(dataDir, 0o750); mkErr != nil {
return fmt.Errorf("storagetmpl: create data-dir %q: %w", dataDir, mkErr)
}
default:
return fmt.Errorf("storagetmpl: stat data-dir %q: %w", dataDir, err)
}
if err := copyTree(template, dataDir); err != nil {
return fmt.Errorf("storagetmpl: copy %q -> %q: %w", template, dataDir, err)
}
return nil
}
// isInitialized reports whether dataDir already contains the
// catalog sentinel. Errors are treated as "not initialized" so a
// permission glitch causes the copy to proceed and fail with a
// concrete error rather than silently skipping.
func isInitialized(dataDir string) bool {
info, err := os.Stat(filepath.Join(dataDir, catalogSentinel))
return err == nil && !info.IsDir()
}
// copyTree walks src and mirrors its layout under dst. Existing
// destination files are overwritten -- callers gate this entire
// function on `isInitialized` so the operator must explicitly
// remove `catalog.duckdb` to reseed.
func copyTree(src, dst string) error {
return filepath.WalkDir(src, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
rel, err := filepath.Rel(src, path)
if err != nil {
return err
}
target := filepath.Join(dst, rel)
if d.IsDir() {
info, infoErr := d.Info()
if infoErr != nil {
return infoErr
}
return os.MkdirAll(target, info.Mode().Perm())
}
if d.Type()&fs.ModeSymlink != 0 {
// Resolve and copy the symlink's content; the
// template is operator-supplied so we never
// preserve the link itself (avoids escapes
// outside dst).
resolved, linkErr := os.Readlink(path)
if linkErr != nil {
return linkErr
}
realPath := resolved
if !filepath.IsAbs(realPath) {
realPath = filepath.Join(filepath.Dir(path), resolved)
}
realInfo, statErr := os.Stat(realPath)
if statErr != nil {
return statErr
}
if realInfo.IsDir() {
return os.MkdirAll(target, realInfo.Mode().Perm())
}
return copyFile(realPath, target, realInfo.Mode().Perm())
}
info, infoErr := d.Info()
if infoErr != nil {
return infoErr
}
return copyFile(path, target, info.Mode().Perm())
})
}
// copyFile copies src -> dst with the given mode. The destination's
// parent is created on demand so a deeply nested template root works
// without the caller pre-creating directories.
//
// `src` and `dst` come from an operator-supplied template tree
// walked by filepath.WalkDir; gosec G304/G306 fire on the variable
// open / Chmod paths but that is the entire point of the helper
// (we're materializing an operator-named directory), so we suppress
// the warnings inline.
func copyFile(src, dst string, mode fs.FileMode) error {
if mkErr := os.MkdirAll(filepath.Dir(dst), 0o750); mkErr != nil {
return mkErr
}
in, err := os.Open(src) //nolint:gosec // src walks an operator-supplied template tree
if err != nil {
return err
}
defer func() { _ = in.Close() }()
// Use O_TRUNC so a partially-copied destination from a prior
// crashed boot does not produce a frankenfile. dst here is
// the operator-supplied data-dir; the gosec warning is the
// entire point of the helper.
out, err := os.OpenFile( //nolint:gosec // dst is the operator-supplied data-dir
dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode,
)
if err != nil {
return err
}
if _, copyErr := io.Copy(out, in); copyErr != nil {
_ = out.Close()
return copyErr
}
return out.Close()
}
// Command check-disposition-parity verifies that the per-node
// disposition table the engine consumes
// (`backend/engine/duckdb/transpiler/node_dispositions.yaml`)
// agrees row-for-row with the human-readable mirror in
// `backend/engine/duckdb/transpiler/SHAPE_TRACKER.md`.
//
// The two files MUST stay in lock-step: the YAML is the
// machine-readable source of truth the engine router and the
// generated `node_dispositions_table.inc` consume; the markdown is
// the human-readable mirror referenced from ROADMAP.md and the
// per-plan docs. A drift between them means either the engine and
// the docs disagree on what route a node kind takes, or the
// `(planned)` annotation in the docs has stopped tracking reality.
//
// Usage (typical):
//
// go run ./tools/check_disposition_parity # check repo files
// go run ./tools/check_disposition_parity \
// --yaml=path/to/node_dispositions.yaml \
// --shape-tracker=path/to/SHAPE_TRACKER.md
//
// Exit codes mirror the Go `flag` package convention:
//
// - 0: tables agree
// - 1: drift detected (the report names every offending row)
// - 2: usage error (missing or unreadable input file)
//
// The deliberately-thin CLI surface keeps the checker drop-in
// runnable from `task lint:dispositions` (the canonical
// developer-facing entry point) and from the CI lint job, with no
// extra flags needed for the common case.
package main
import (
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
)
const (
defaultYAMLRel = "backend/engine/duckdb/transpiler/node_dispositions.yaml"
defaultShapeTrackerRel = "backend/engine/duckdb/transpiler/SHAPE_TRACKER.md"
)
var (
errUsage = errors.New("usage error")
errFindings = errors.New("parity findings")
)
func main() {
if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
switch {
case errors.Is(err, errUsage):
os.Exit(2)
case errors.Is(err, errFindings):
os.Exit(1)
default:
_, _ = fmt.Fprintf(os.Stderr, "check-disposition-parity: %v\n", err)
os.Exit(1)
}
}
}
// run is the testable entry point. It returns an error rather than
// calling os.Exit so tests can drive the full code path with
// table-driven fixtures.
func run(args []string, stdout, stderr io.Writer) error {
fs := flag.NewFlagSet("check-disposition-parity", flag.ContinueOnError)
fs.SetOutput(stderr)
yamlPath := fs.String("yaml", "",
"Path to node_dispositions.yaml (defaults to the repo-relative "+
"path under the current working directory).")
shapePath := fs.String("shape-tracker", "",
"Path to SHAPE_TRACKER.md (defaults to the repo-relative path "+
"under the current working directory).")
if err := fs.Parse(args); err != nil {
return errUsage
}
if fs.NArg() > 0 {
_, _ = fmt.Fprintf(stderr,
"check-disposition-parity: unexpected positional args: %v\n",
fs.Args())
return errUsage
}
yp, sp, err := resolvePaths(*yamlPath, *shapePath)
if err != nil {
_, _ = fmt.Fprintf(stderr, "check-disposition-parity: %v\n", err)
return errUsage
}
// gosec G304: this CLI takes user-controlled paths by design (the
// --yaml / --shape-tracker flags); the same posture applies as
// the rest of the in-repo lint tooling that consumes file lists.
yamlBytes, err := os.ReadFile(yp) //nolint:gosec // user-controlled lint input
if err != nil {
return fmt.Errorf("read %s: %w", yp, err)
}
shapeBytes, err := os.ReadFile(sp) //nolint:gosec // user-controlled lint input
if err != nil {
return fmt.Errorf("read %s: %w", sp, err)
}
yamlRows, err := parseYAML(string(yamlBytes))
if err != nil {
return fmt.Errorf("parse %s: %w", yp, err)
}
shapeRows, err := parseShapeTracker(string(shapeBytes))
if err != nil {
return fmt.Errorf("parse %s: %w", sp, err)
}
findings := compareParity(yamlRows, shapeRows)
if len(findings) == 0 {
_, _ = fmt.Fprintf(stdout,
"check-disposition-parity: ok (%d YAML rows, %d SHAPE_TRACKER rows)\n",
len(yamlRows), len(shapeRows))
return nil
}
for _, f := range findings {
_, _ = fmt.Fprintln(stderr, f)
}
_, _ = fmt.Fprintf(stderr,
"check-disposition-parity: %d disagreement(s) between %s and %s\n",
len(findings), yp, sp)
return errFindings
}
// resolvePaths fills in the defaults when the caller did not pass
// explicit --yaml / --shape-tracker flags. The defaults are the
// well-known repo-relative paths against the current working
// directory; callers run from the repo root by convention (via
// `task lint:dispositions`).
func resolvePaths(yamlFlag, shapeFlag string) (yp, sp string, err error) {
yp = yamlFlag
sp = shapeFlag
if yp == "" {
yp = filepath.FromSlash(defaultYAMLRel)
}
if sp == "" {
sp = filepath.FromSlash(defaultShapeTrackerRel)
}
if _, statErr := os.Stat(yp); statErr != nil {
return "", "", fmt.Errorf("--yaml path %q is not readable: %w", yp, statErr)
}
if _, statErr := os.Stat(sp); statErr != nil {
return "", "", fmt.Errorf("--shape-tracker path %q is not readable: %w", sp, statErr)
}
return yp, sp, nil
}
package main
import (
"fmt"
"sort"
"strings"
)
// Route name constants. Shared with
// `backend/engine/disposition.h`, `node_dispositions.yaml`, and
// `SHAPE_TRACKER.md`. Adding a new route means editing the enum,
// both data files, the generators, and this file in lock-step.
const (
routeDuckdbNative = "duckdb_native"
routeDuckdbRewrite = "duckdb_rewrite"
routeDuckdbUDF = "duckdb_udf"
routeSemanticExecutor = "semantic_executor"
routeControlOp = "control_op"
routeLocalStub = "local_stub"
routeUnsupported = "unsupported"
)
// validDispositions is the closed set of route names.
var validDispositions = map[string]struct{}{
routeDuckdbNative: {},
routeDuckdbRewrite: {},
routeDuckdbUDF: {},
routeSemanticExecutor: {},
routeControlOp: {},
routeLocalStub: {},
routeUnsupported: {},
}
// yamlRow is a single entry from node_dispositions.yaml.
type yamlRow struct {
// node is the GoogleSQL ResolvedAST class name (no backticks,
// case-sensitive).
node string
// disposition is the lowercase route name (one of
// validDispositions).
disposition string
// lineNumber points back at the source YAML line for diagnostics.
lineNumber int
}
// shapeRow is a single row from the SHAPE_TRACKER.md tables.
type shapeRow struct {
// nodes is the list of node-kind tokens the row's `Node` cell
// covered (composite cells like `Foo / Bar` and wildcards like
// `ResolvedGraph*Scan` expand into multiple tokens; wildcards
// keep their `*` and are matched against the YAML by
// matchesWildcard below).
nodes []string
// disposition is the lowercase route name from the row's
// `Status` cell (one of validDispositions). Any suffix the
// status cell carried (e.g. `(subset)`) is stripped.
disposition string
// lineNumber points back at the source markdown line for
// diagnostics.
lineNumber int
}
// parseYAML is the line-oriented reader for node_dispositions.yaml.
//
// We re-implement a narrow YAML reader rather than pull in
// gopkg.in/yaml.v3 so the parity check inherits the same "no extra
// deps to bootstrap" property the awk-based table generator already
// has. The same grammar:
//
// <NodeKind>: <disposition> [status=planned]
//
// applies; we only care about the `<NodeKind>` and `<disposition>`
// tokens here (the plan / status metadata is relevant to the
// generator and to runtime callers, not to parity).
func parseYAML(src string) ([]yamlRow, error) {
var rows []yamlRow
for lineNo, raw := range strings.Split(src, "\n") {
line := stripInlineComment(raw)
line = strings.TrimSpace(line)
if line == "" {
continue
}
before, after, ok := strings.Cut(line, ":")
if !ok {
return nil, fmt.Errorf("line %d: missing `:` separator: %q",
lineNo+1, raw)
}
key := strings.TrimSpace(before)
rest := strings.TrimSpace(after)
if key == "" {
return nil, fmt.Errorf("line %d: empty node-kind key: %q",
lineNo+1, raw)
}
tokens := strings.Fields(rest)
if len(tokens) == 0 {
return nil, fmt.Errorf("line %d: missing disposition for %q",
lineNo+1, key)
}
disp := tokens[0]
if _, ok := validDispositions[disp]; !ok {
return nil, fmt.Errorf(
"line %d: unknown disposition %q for node %q",
lineNo+1, disp, key)
}
rows = append(rows, yamlRow{
node: key,
disposition: disp,
lineNumber: lineNo + 1,
})
}
return rows, nil
}
// stripInlineComment removes a trailing `# ...` from a YAML line. We
// take a deliberately narrow view of "inline comment": any `#`
// preceded by whitespace (or at column 0) starts a comment. That is
// enough for the disposition tables (which never put `#` inside a
// node name or a disposition word) and keeps the parser tiny.
func stripInlineComment(s string) string {
for i := 0; i < len(s); i++ {
if s[i] != '#' {
continue
}
if i == 0 || s[i-1] == ' ' || s[i-1] == '\t' {
return s[:i]
}
}
return s
}
// parseShapeTracker extracts the per-node disposition table rows
// from SHAPE_TRACKER.md. Only table rows whose Node cell contains
// at least one identifier matching "starts with `Resolved`" are
// considered; that filters out the explanatory header rows and the
// summary paragraphs that follow each table.
func parseShapeTracker(src string) ([]shapeRow, error) {
var rows []shapeRow
for lineNo, raw := range strings.Split(src, "\n") {
line := strings.TrimSpace(raw)
if !strings.HasPrefix(line, "|") {
continue
}
// Skip the table header / separator lines (the separator
// has `---` cells, the header has the literal "Node" label).
if strings.Contains(line, "---") {
continue
}
cells := splitMarkdownRow(line)
if len(cells) < 2 {
continue
}
nodeCell := strings.TrimSpace(cells[0])
// The header row's first cell is `Node`, which we drop here
// (and also any other non-Resolved-prefixed first cell).
nodes := extractNodes(nodeCell)
if len(nodes) == 0 {
continue
}
statusCell := strings.TrimSpace(cells[1])
disposition, err := extractDisposition(statusCell)
if err != nil {
return nil, fmt.Errorf("line %d: %w", lineNo+1, err)
}
rows = append(rows, shapeRow{
nodes: nodes,
disposition: disposition,
lineNumber: lineNo + 1,
})
}
return rows, nil
}
// splitMarkdownRow splits a `| a | b | c |` pipe-table row into its
// cells. Backticks may contain `|` characters in some markdown
// dialects; SHAPE_TRACKER.md does not exercise that case so we use
// the simple split-on-`|` approach instead of dragging in a real
// markdown parser.
func splitMarkdownRow(line string) []string {
line = strings.TrimPrefix(line, "|")
line = strings.TrimSuffix(line, "|")
return strings.Split(line, "|")
}
// extractNodes pulls the per-class identifiers out of a `Node` cell.
// Supports:
//
// - single class names (“ `ResolvedQueryStmt` “)
// - slash-joined composite rows (“ `Foo` / `Bar` “)
// - wildcard families (“ `ResolvedGraph*Scan` “)
//
// Everything that does not look like a class identifier (e.g. the
// header row's literal "Node" text) returns an empty slice so the
// caller skips the row.
func extractNodes(cell string) []string {
cell = strings.ReplaceAll(cell, "\\*", "*")
parts := strings.Split(cell, "/")
var out []string
for _, p := range parts {
p = strings.TrimSpace(p)
p = strings.Trim(p, "`")
p = strings.TrimSpace(p)
if p == "" {
continue
}
// We only care about identifiers; anything else (the
// header row's "Node" label, the column-separator's
// `---`, etc.) does not start with `Resolved` and would
// have already been filtered by the caller.
if !strings.HasPrefix(p, "Resolved") {
return nil
}
out = append(out, p)
}
return out
}
// extractDisposition pulls the canonical route name out of a Status
// cell. Status cells look like:
//
// `duckdb_native`
// `duckdb_native` (subset)
// `duckdb_native` (planned)
//
// We strip the trailing `(subset)` / `(planned)` annotations and
// keep the first backticked word as the disposition.
func extractDisposition(cell string) (string, error) {
open := strings.IndexByte(cell, '`')
if open == -1 {
return "", fmt.Errorf("status cell has no backticked disposition: %q", cell)
}
close := strings.IndexByte(cell[open+1:], '`')
if close == -1 {
return "", fmt.Errorf("unterminated backtick in status cell: %q", cell)
}
word := strings.TrimSpace(cell[open+1 : open+1+close])
if _, ok := validDispositions[word]; !ok {
return "", fmt.Errorf("unknown disposition %q in status cell: %q",
word, cell)
}
return word, nil
}
// compareParity returns a sorted list of findings -- empty when the
// two sources agree. Findings cover three cases:
//
// 1. A SHAPE_TRACKER node has no matching YAML row.
// 2. A SHAPE_TRACKER node's disposition disagrees with the YAML row.
// 3. A YAML node has no matching SHAPE_TRACKER row.
//
// SHAPE_TRACKER wildcards (`ResolvedGraph*Scan`) match every YAML
// node whose name fits the pattern; every matched YAML row must
// share the wildcard's disposition.
func compareParity(yaml []yamlRow, shape []shapeRow) []string {
yamlByName := make(map[string]yamlRow, len(yaml))
for _, r := range yaml {
yamlByName[r.node] = r
}
seen := make(map[string]bool, len(yaml))
var out []string
for _, row := range shape {
out = append(out, walkShapeRow(row, yaml, yamlByName, seen)...)
}
for _, yr := range yaml {
if seen[yr.node] {
continue
}
out = append(out, fmt.Sprintf(
"node_dispositions.yaml line %d has %s -> %s but "+
"SHAPE_TRACKER.md has no matching row",
yr.lineNumber, yr.node, yr.disposition))
}
sort.Strings(out)
return out
}
// walkShapeRow expands one SHAPE_TRACKER row into per-token
// findings (exact + wildcard) and marks the matched YAML rows in
// `seen`. Pulled out so `compareParity` stays under the funlen
// linter cap and so the exact/wildcard branches are testable in
// isolation if a future plan grows them.
func walkShapeRow(
row shapeRow,
yaml []yamlRow,
yamlByName map[string]yamlRow,
seen map[string]bool,
) []string {
var out []string
for _, token := range row.nodes {
if strings.Contains(token, "*") {
out = append(out, expandWildcard(row, token, yaml, seen)...)
continue
}
yr, ok := yamlByName[token]
if !ok {
out = append(out, fmt.Sprintf(
"SHAPE_TRACKER.md line %d references %s but "+
"node_dispositions.yaml has no matching row",
row.lineNumber, token))
continue
}
seen[yr.node] = true
if yr.disposition != row.disposition {
out = append(out, mismatchFinding(row, yr, token))
}
}
return out
}
// expandWildcard handles the `Foo*Bar` form of a SHAPE_TRACKER row
// token: it must match at least one YAML row, and every matched
// row's disposition must agree with the wildcard row's.
func expandWildcard(
row shapeRow,
token string,
yaml []yamlRow,
seen map[string]bool,
) []string {
matched := false
var out []string
for _, yr := range yaml {
if !matchesWildcard(token, yr.node) {
continue
}
matched = true
seen[yr.node] = true
if yr.disposition != row.disposition {
out = append(out, mismatchFinding(row, yr, yr.node))
}
}
if !matched {
out = append(out, fmt.Sprintf(
"SHAPE_TRACKER.md line %d references wildcard %s "+
"but node_dispositions.yaml has no matching row",
row.lineNumber, token))
}
return out
}
// mismatchFinding is the canonical disposition-disagreement
// message format. Centralised so the two call sites in
// walkShapeRow / expandWildcard cannot drift.
func mismatchFinding(row shapeRow, yr yamlRow, displayName string) string {
return fmt.Sprintf(
"disposition mismatch: SHAPE_TRACKER.md line %d says %s -> %s, "+
"node_dispositions.yaml line %d says %s -> %s",
row.lineNumber, displayName, row.disposition,
yr.lineNumber, yr.node, yr.disposition)
}
// matchesWildcard reports whether `name` matches the glob-style
// `pattern`. Only `*` is supported (it stands for any run of
// characters); SHAPE_TRACKER's wildcards only use the `*` form
// (e.g. `ResolvedGraph*Scan`).
func matchesWildcard(pattern, name string) bool {
if !strings.Contains(pattern, "*") {
return pattern == name
}
parts := strings.Split(pattern, "*")
if !strings.HasPrefix(name, parts[0]) {
return false
}
rest := name[len(parts[0]):]
for i := 1; i < len(parts); i++ {
p := parts[i]
if i == len(parts)-1 {
return strings.HasSuffix(rest, p)
}
idx := strings.Index(rest, p)
if idx == -1 {
return false
}
rest = rest[idx+len(p):]
}
return true
}
package main
import (
"encoding/json"
"fmt"
"io"
"os"
"strconv"
)
// badgeJSON matches the shape shields.io's "endpoint" badge consumer
// requires. See https://shields.io/badges/endpoint-badge for the
// schema. We deliberately keep this struct narrow (no `cacheSeconds`
// override etc) because the README badges use shields.io's defaults
// and we don't want the badge JSON to grow extra knobs without a
// reason.
type badgeJSON struct {
SchemaVersion int `json:"schemaVersion"`
Label string `json:"label"`
Message string `json:"message"`
Color string `json:"color"`
}
// Color thresholds for the badges. These match the colour bands
// codecov uses on its default badges to keep the visual transition
// from the old badges to the new ones smooth.
const (
colorBrightGreen = "brightgreen" // [90, 100]
colorGreen = "green" // [80, 90)
colorYellowGreen = "yellowgreen" // [70, 80)
colorYellow = "yellow" // [60, 70)
colorOrange = "orange" // [40, 60)
colorRed = "red" // [0, 40)
colorLightgrey = "lightgrey" // missing data
)
// badgeColor maps a percentage to the shields.io colour string. -1
// (the missingFlag sentinel) collapses to "lightgrey" so a flag with
// no data renders as a clearly-greyed-out badge rather than a "0%
// red" badge that would imply the suite ran and failed.
func badgeColor(pct float64) string {
switch {
case pct < 0:
return colorLightgrey
case pct >= 90:
return colorBrightGreen
case pct >= 80:
return colorGreen
case pct >= 70:
return colorYellowGreen
case pct >= 60:
return colorYellow
case pct >= 40:
return colorOrange
default:
return colorRed
}
}
// badgeMessage renders the percentage with one decimal place, except
// for the missing-data sentinel which renders as the literal "n/a".
// Keeping the format tight ("71.3%") matches the codecov badges we're
// replacing.
func badgeMessage(pct float64) string {
if pct < 0 {
return missingMessage
}
return strconv.FormatFloat(pct, 'f', 1, 64) + "%"
}
// pickField extracts the percentage for a given --field name from a
// Summary. Returns an error for unrecognized fields so a typo in the
// workflow doesn't silently default to the total.
func pickField(s *Summary, field string) (float64, error) {
switch field {
case "total":
return s.Total, nil
case "go":
return s.Go, nil
case "cpp":
return s.CPP, nil
default:
return 0, fmt.Errorf("unknown --field %q (want one of: total, go, cpp)", field)
}
}
// runBadge implements `coverage badge`. It reads a summary.json
// previously emitted by `coverage summarize`, picks one field, and
// writes the shields.io endpoint JSON either to --out or stdout.
func runBadge(args []string, stdout, stderr io.Writer) error {
fs := flagSet("badge", stderr)
in := fs.String("in", "", "input summary.json path (required)")
out := fs.String("out", "", "output badge JSON path (default: stdout)")
field := fs.String("field", "total", "summary field to render (total|go|cpp)")
label := fs.String("label", "coverage", "badge label")
if err := fs.Parse(args); err != nil {
return err
}
if *in == "" {
_, _ = fmt.Fprintln(stderr, "badge: --in is required")
fs.Usage()
return errUsage
}
summary, err := readSummary(*in)
if err != nil {
return err
}
pct, err := pickField(summary, *field)
if err != nil {
return err
}
payload := badgeJSON{
SchemaVersion: 1,
Label: *label,
Message: badgeMessage(pct),
Color: badgeColor(pct),
}
return emitBadge(&payload, *out, stdout)
}
func emitBadge(b *badgeJSON, outPath string, stdout io.Writer) error {
buf, err := json.MarshalIndent(b, "", " ")
if err != nil {
return fmt.Errorf("marshal badge: %w", err)
}
buf = append(buf, '\n')
if outPath == "" {
_, err = stdout.Write(buf)
return err
}
//nolint:gosec // 0o644 is the right mode for a CI-published JSON artifact.
if err := os.WriteFile(outPath, buf, 0o644); err != nil {
return fmt.Errorf("write %q: %w", outPath, err)
}
return nil
}
package main
import (
"errors"
"fmt"
"io"
"os"
"strings"
)
// errRegression is returned by runGate when one or more tracked
// fields fell below the configured tolerance. It causes the binary
// to exit with status 1 so CI marks the check as failed.
var errRegression = errors.New("coverage regression")
// gateResult is the per-field outcome the gate prints to stdout. We
// always print one row per tracked flag (total, go, cpp) so the GitHub
// step-summary table is consistent across runs, even when a flag
// happens to be missing in either the current or baseline summary.
type gateResult struct {
field string
current float64
baseline float64
tolerance float64
floor float64
regression float64 // baseline - current; positive means we went down
belowFloor bool
overTol bool
missingCurr bool
missingBase bool
}
func (r gateResult) failed() bool {
return r.overTol || r.belowFloor
}
// formatPct renders one percentage cell for the gate's table output.
// Missing flags collapse to "n/a" to match the badge command and keep
// the visual mapping between the two outputs obvious.
func formatPct(v float64) string {
if v < 0 {
return missingMessage
}
return fmt.Sprintf("%.1f%%", v)
}
// runGate implements `coverage gate`. It loads both summaries,
// compares each tracked field, prints a markdown-friendly table, and
// returns errRegression iff any field tripped the tolerance or floor.
//
// When --baseline points at a file that doesn't exist, the gate
// treats every field as having no baseline yet, prints a warning to
// stderr, and exits 0. That preserves the bootstrap case where
// gh-pages has not been populated yet.
//
//nolint:cyclop // straight-line flag handling; refactor would hurt readability.
func runGate(args []string, stdout, stderr io.Writer) error {
fs := flagSet("gate", stderr)
currentPath := fs.String("current", "", "current summary.json path (required)")
baselinePath := fs.String("baseline", "", "baseline summary.json path (required)")
tol := fs.Float64("tolerance", 1.0, "max allowed regression (percentage points) on the total field")
floor := fs.Float64("floor", 0.0, "absolute floor (percentage points) on the total field; 0 disables")
goTol := fs.Float64("go-tolerance", 1.0, "max allowed regression on the go field")
goFloor := fs.Float64("go-floor", 0.0, "absolute floor on the go field")
cppTol := fs.Float64("cpp-tolerance", 1.0, "max allowed regression on the cpp field")
cppFloor := fs.Float64("cpp-floor", 0.0, "absolute floor on the cpp field")
if err := fs.Parse(args); err != nil {
return err
}
if *currentPath == "" || *baselinePath == "" {
_, _ = fmt.Fprintln(stderr, "gate: --current and --baseline are both required")
fs.Usage()
return errUsage
}
current, err := readSummary(*currentPath)
if err != nil {
return err
}
baseline, baselineMissing, err := loadBaseline(*baselinePath, stderr)
if err != nil {
return err
}
results := []gateResult{
evalField("total", current.Total, baseline.Total, *tol, *floor),
evalField("go", current.Go, baseline.Go, *goTol, *goFloor),
evalField("cpp", current.CPP, baseline.CPP, *cppTol, *cppFloor),
}
writeGateTable(stdout, results, baselineMissing)
if baselineMissing {
return nil
}
for _, r := range results {
if r.failed() {
return errRegression
}
}
return nil
}
// loadBaseline opens the baseline JSON, treating a non-existent file
// as the bootstrap case (returns a zero-valued Summary, missing=true,
// no error). Any other read or decode failure propagates up so CI
// surfaces the real problem instead of silently passing.
func loadBaseline(path string, stderr io.Writer) (*Summary, bool, error) {
if _, err := os.Stat(path); errors.Is(err, os.ErrNotExist) {
_, _ = fmt.Fprintf(stderr, "gate: baseline %q does not exist; treating as bootstrap (no gate).\n", path)
return &Summary{Total: missingFlag, Go: missingFlag, CPP: missingFlag}, true, nil
} else if err != nil {
return nil, false, fmt.Errorf("stat baseline %q: %w", path, err)
}
s, err := readSummary(path)
if err != nil {
return nil, false, err
}
return s, false, nil
}
// evalField runs the comparison for a single tracked field. Missing
// inputs collapse to "no opinion": if either the current or the
// baseline value is the missing sentinel, that field cannot fail the
// gate. This protects against, e.g., the C++ Bazel suite being
// temporarily disabled and the gate suddenly demanding a 0 -> 0
// improvement on a flag with no real data.
func evalField(name string, current, baseline, tol, floor float64) gateResult {
r := gateResult{
field: name,
current: current,
baseline: baseline,
tolerance: tol,
floor: floor,
missingCurr: current < 0,
missingBase: baseline < 0,
}
if r.missingCurr || r.missingBase {
return r
}
r.regression = baseline - current
if r.regression > tol {
r.overTol = true
}
if floor > 0 && current < floor {
r.belowFloor = true
}
return r
}
// writeGateTable prints a markdown table summarising every field, the
// per-row result, and a final pass/fail line. The output is written
// to stdout (so it slots into `$GITHUB_STEP_SUMMARY` via
// `coverage gate ... >> $GITHUB_STEP_SUMMARY`) rather than stderr.
func writeGateTable(w io.Writer, results []gateResult, baselineMissing bool) {
var b strings.Builder
b.WriteString("## Coverage gate\n\n")
if baselineMissing {
b.WriteString("_Baseline missing; no regression gate enforced for this run._\n\n")
}
b.WriteString("| field | current | baseline | delta | tolerance | floor | status |\n")
b.WriteString("|-------|---------|----------|-------|-----------|-------|--------|\n")
for _, r := range results {
b.WriteString(formatRow(r))
}
_, _ = fmt.Fprint(w, b.String())
}
func formatRow(r gateResult) string {
delta := fmt.Sprintf("%+.1f%%", -r.regression)
if r.missingCurr || r.missingBase {
delta = "-"
}
status := "ok"
switch {
case r.missingCurr || r.missingBase:
status = "skipped (missing data)"
case r.overTol:
status = fmt.Sprintf("FAIL: regressed %.1fpp > %.1fpp tol", r.regression, r.tolerance)
case r.belowFloor:
status = fmt.Sprintf("FAIL: below floor %.1f%%", r.floor)
}
return fmt.Sprintf(
"| %s | %s | %s | %s | %.1fpp | %.1f%% | %s |\n",
r.field,
formatPct(r.current),
formatPct(r.baseline),
delta,
r.tolerance,
r.floor,
status,
)
}
// Command coverage is the BigQuery emulator's self-hosted Codecov
// replacement. It ingests the two coverage artifacts produced by CI
// (Go `coverage.out` from `go test -coverprofile=...`, and the
// aggregated LCOV `.dat` from `bazel coverage --combined_report=lcov`),
// then emits the three pieces the gh-pages pipeline needs:
//
// 1. `summarize` writes a JSON summary with the overall percentage
// plus per-flag (go, cpp) percentages.
// 2. `badge` writes a shields.io endpoint JSON for one field of
// that summary so the README badges can be rendered
// dynamically from gh-pages without any external SaaS.
// 3. `gate` compares the current summary against the baseline
// published by the last `main` build and exits
// non-zero if any tracked percentage regressed beyond
// the configured tolerance or fell below an absolute
// floor.
//
// The binary has no external dependencies on purpose: it ships as part
// of the repo, runs in any environment that has a Go toolchain, and is
// trivial for contributors to reproduce locally via `task coverage:*`.
package main
import (
"errors"
"flag"
"fmt"
"io"
"os"
)
// Subcommand names. Declared as constants so the dispatch switch in
// run() and the table-driven tests stay aligned (and so the goconst
// linter does not complain about the strings repeating across files).
const (
cmdSummarize = "summarize"
cmdBadge = "badge"
cmdGate = "gate"
)
// missingMessage is the literal rendered for missing-data percentages
// across the badge and gate subcommands. Centralised because the
// shields.io endpoint and the markdown step-summary share the same
// "absent value" convention.
const missingMessage = "n/a"
// run is the testable entry point. It dispatches on the subcommand
// (first positional argument) and returns an error instead of calling
// os.Exit so tests can exercise the full code path with table-driven
// fixtures without managing process lifetimes.
func run(args []string, stdout, stderr io.Writer) error {
if len(args) < 1 {
usage(stderr)
return errUsage
}
cmd, rest := args[0], args[1:]
switch cmd {
case cmdSummarize:
return runSummarize(rest, stdout, stderr)
case cmdBadge:
return runBadge(rest, stdout, stderr)
case cmdGate:
return runGate(rest, stdout, stderr)
case "-h", "--help", "help":
usage(stdout)
return nil
default:
_, _ = fmt.Fprintf(stderr, "coverage: unknown subcommand %q\n\n", cmd)
usage(stderr)
return errUsage
}
}
// errUsage signals that the caller passed an unrecognized or malformed
// invocation. main() translates it to exit code 2 (matching the Go
// `flag` package's convention) so wrappers can distinguish "you used
// it wrong" from a real failure.
var errUsage = errors.New("usage error")
func usage(w io.Writer) {
_, _ = fmt.Fprint(w, `coverage - aggregate Go + C++ coverage for the self-hosted gh-pages pipeline.
Subcommands:
summarize Combine a Go coverage profile and/or an LCOV file into summary.json.
badge Emit a shields.io endpoint JSON for one field of summary.json.
gate Compare current summary to the baseline and fail on regression.
Run "coverage <subcommand> -h" for per-subcommand flags.
`)
}
// flagSet builds a FlagSet that prints its usage to stderr and stops
// on the first error. Centralizing this keeps every subcommand
// behaving the same way (and keeps tests from being polluted by
// stdlib's default ExitOnError behaviour).
func flagSet(name string, stderr io.Writer) *flag.FlagSet {
fs := flag.NewFlagSet(name, flag.ContinueOnError)
fs.SetOutput(stderr)
return fs
}
func main() {
if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
if errors.Is(err, errUsage) {
os.Exit(2)
}
_, _ = fmt.Fprintf(os.Stderr, "coverage: %v\n", err)
os.Exit(1)
}
}
package main
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
)
// parseGoFile opens a Go coverage profile and returns (hitStatements,
// totalStatements). The Go profile format is documented at
// https://pkg.go.dev/golang.org/x/tools/cover and consists of:
//
// - an optional first line `mode: (set|count|atomic)`
// - one record per covered statement block:
// `<file>:<startLine>.<startCol>,<endLine>.<endCol> <numStmts> <count>`
//
// We sum numStmts as totalStatements and the same numStmts whenever
// count > 0 as hitStatements. Mirrors what `go tool cover -func` prints
// on its `total:` line without shelling out to it (and without a
// dependency on `golang.org/x/tools/cover`).
func parseGoFile(path string) (hits, total int64, err error) {
//nolint:gosec // CLI tool; reading caller-supplied paths is the point.
f, err := os.Open(path)
if err != nil {
return 0, 0, fmt.Errorf("open: %w", err)
}
defer f.Close() //nolint:errcheck // read-only; close errors are not actionable
return parseGoReader(f)
}
// parseGoReader is the buffered-reader entry point so tests can drive
// the parser without touching the filesystem.
func parseGoReader(r io.Reader) (hits, total int64, err error) {
scanner := bufio.NewScanner(r)
// Go coverage profiles for very large monorepos can exceed bufio's
// default 64 KiB line cap when a single statement spans a wide
// generated file; lift the cap to 1 MiB so we don't silently
// truncate.
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
lineNo := 0
for scanner.Scan() {
lineNo++
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if strings.HasPrefix(line, "mode:") {
continue
}
stmts, count, perr := parseGoLine(line)
if perr != nil {
return 0, 0, fmt.Errorf("line %d: %w", lineNo, perr)
}
total += stmts
if count > 0 {
hits += stmts
}
}
if err := scanner.Err(); err != nil {
return 0, 0, fmt.Errorf("scan: %w", err)
}
return hits, total, nil
}
// parseGoLine splits the trailing two numeric fields off a coverage
// record and returns (numStatements, count). The leading
// `file:start.col,end.col` slug is ignored because the per-file
// breakdown is only relevant for `go tool cover -html`, which the
// publishing workflow runs separately.
func parseGoLine(line string) (stmts, count int64, err error) {
fields := strings.Fields(line)
const requiredFields = 3
if len(fields) < requiredFields {
return 0, 0, fmt.Errorf("unexpected field count %d in %q", len(fields), line)
}
stmts, err = strconv.ParseInt(fields[len(fields)-2], 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("parse stmts %q: %w", fields[len(fields)-2], err)
}
count, err = strconv.ParseInt(fields[len(fields)-1], 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("parse count %q: %w", fields[len(fields)-1], err)
}
return stmts, count, nil
}
package main
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
)
// parseLCOVFile opens an LCOV "tracefile" (the .dat that
// `bazel coverage --combined_report=lcov` deposits at
// bazel-out/_coverage/_coverage_report.dat) and returns total
// (hitLines, totalLines) across every `SF:` record. We sum the
// per-line `DA:<line>,<count>` records directly rather than trusting
// the summary `LH:` / `LF:` totals, because some toolchains emit `LH`
// without `LF` (or vice versa) when a file has no executable lines.
//
// The LCOV format is described in
// https://manpages.debian.org/testing/lcov/geninfo.1.en.html under
// "TRACEFILE FORMAT". For our purposes we only care about three
// record types:
//
// SF:<absolute path> // start of a source file record
// DA:<line>,<count> // one DA per executable line
// end_of_record // end of a source file record
//
// Anything else (TN, FN, BRDA, ...) is ignored.
func parseLCOVFile(path string) (hits, total int64, err error) {
//nolint:gosec // CLI tool; reading caller-supplied paths is the point.
f, err := os.Open(path)
if err != nil {
return 0, 0, fmt.Errorf("open: %w", err)
}
defer f.Close() //nolint:errcheck // read-only; close errors are not actionable
return parseLCOVReader(f)
}
// parseLCOVReader is the buffered-reader entry point so tests can
// drive the parser without touching the filesystem.
func parseLCOVReader(r io.Reader) (hits, total int64, err error) {
scanner := bufio.NewScanner(r)
// Same generous line-length bump as the Go parser: combined LCOV
// reports from large C++ trees can occasionally exceed 64 KiB on
// pathological `BRDA:` lines, and we'd rather take the memory
// than silently miscount.
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var daHits, daTotal int64
var lhLfHits, lhLfTotal int64
var recordLH, recordLF int64
lineNo := 0
for scanner.Scan() {
lineNo++
line := strings.TrimSpace(scanner.Text())
switch {
case strings.HasPrefix(line, "DA:"):
count, perr := parseDARecord(line)
if perr != nil {
return 0, 0, fmt.Errorf("line %d: %w", lineNo, perr)
}
daTotal++
if count > 0 {
daHits++
}
case strings.HasPrefix(line, "LH:"):
recordLH, err = parseSummaryCount(line, "LH:")
if err != nil {
return 0, 0, fmt.Errorf("line %d: %w", lineNo, err)
}
case strings.HasPrefix(line, "LF:"):
recordLF, err = parseSummaryCount(line, "LF:")
if err != nil {
return 0, 0, fmt.Errorf("line %d: %w", lineNo, err)
}
case line == "end_of_record":
lhLfHits += recordLH
lhLfTotal += recordLF
recordLH = 0
recordLF = 0
}
}
if err := scanner.Err(); err != nil {
return 0, 0, fmt.Errorf("scan: %w", err)
}
if daTotal > 0 {
return daHits, daTotal, nil
}
return lhLfHits, lhLfTotal, nil
}
func parseSummaryCount(line, prefix string) (int64, error) {
value := strings.TrimSpace(strings.TrimPrefix(line, prefix))
count, err := strconv.ParseInt(value, 10, 64)
if err != nil {
return 0, fmt.Errorf("parse %s record %q: %w", strings.TrimSuffix(prefix, ":"), line, err)
}
return count, nil
}
// parseDARecord pulls the execution count off a `DA:<line>,<count>[,<checksum>]`
// line. The optional MD5 checksum field that geninfo emits when
// `--checksum` is enabled is tolerated and ignored.
func parseDARecord(line string) (count int64, err error) {
rest := strings.TrimPrefix(line, "DA:")
parts := strings.Split(rest, ",")
const minFields = 2 // line,count (checksum optional)
if len(parts) < minFields {
return 0, fmt.Errorf("malformed DA record %q", line)
}
count, err = strconv.ParseInt(parts[1], 10, 64)
if err != nil {
return 0, fmt.Errorf("parse count %q in %q: %w", parts[1], line, err)
}
return count, nil
}
package main
import (
"encoding/json"
"fmt"
"io"
"os"
"time"
)
// Summary is the JSON shape that lands at gh-pages:baseline.json after
// every main-branch build and at workflow_run artifact downloads for
// PR gating. Per-flag fields use -1 when no input was supplied for
// that flag so downstream consumers (badge, gate) can distinguish
// "missing" from "really zero coverage".
type Summary struct {
// Total is the union percentage across every flag the summarize
// command consumed; when only one flag is supplied it matches
// that flag's percentage exactly.
Total float64 `json:"total"`
// Go is the percentage from the Go `coverage.out` profile.
Go float64 `json:"go"`
// CPP is the percentage from the Bazel/LCOV combined report.
CPP float64 `json:"cpp"`
// Commit is the git SHA the summary describes, if known.
Commit string `json:"commit,omitempty"`
// Timestamp is RFC3339 UTC.
Timestamp string `json:"timestamp,omitempty"`
}
// missingFlag is the sentinel the consumers use to detect "no input
// for this flag was supplied". -1 is impossible for a real percentage,
// so it round-trips through JSON unambiguously.
const missingFlag = -1.0
// percentage divides hits by total guarding against zero, scales to a
// 0..100 range, and pins to one decimal place to keep the JSON file
// stable across runs that drift in the noise-floor.
func percentage(hits, total int64) float64 {
if total <= 0 {
return 0
}
pct := float64(hits) / float64(total) * 100
return roundTenths(pct)
}
// roundTenths rounds to one decimal place so summary.json does not
// churn on minor floating-point noise. Avoids `math.Round` so the
// behaviour is obvious from the source.
func roundTenths(v float64) float64 {
return float64(int64(v*10+0.5)) / 10
}
// runSummarize implements `coverage summarize`. Either of --go or
// --lcov may be omitted (e.g. when the lcov producer failed in CI);
// the missing flag's per-flag field is reported as `missingFlag` and
// the total is computed from the flags that did provide data.
//
//nolint:cyclop // straight-line option handling; splitting hurts readability.
func runSummarize(args []string, stdout, stderr io.Writer) error {
fs := flagSet("summarize", stderr)
goPath := fs.String("go", "", "path to Go coverage profile (go test -coverprofile)")
lcovPath := fs.String("lcov", "", "path to LCOV combined report (bazel coverage --combined_report=lcov)")
outPath := fs.String("out", "", "output JSON path (default: stdout)")
commit := fs.String("commit", "", "git commit SHA to record in summary.json (optional)")
if err := fs.Parse(args); err != nil {
return err
}
if *goPath == "" && *lcovPath == "" {
_, _ = fmt.Fprintln(stderr, "summarize: at least one of --go or --lcov is required")
fs.Usage()
return errUsage
}
summary := Summary{
Go: missingFlag,
CPP: missingFlag,
Commit: *commit,
Timestamp: time.Now().UTC().Format(time.RFC3339),
}
var goHits, goTotal, cppHits, cppTotal int64
if *goPath != "" {
h, t, err := parseGoFile(*goPath)
if err != nil {
return fmt.Errorf("parse go profile %q: %w", *goPath, err)
}
goHits, goTotal = h, t
summary.Go = percentage(h, t)
}
if *lcovPath != "" {
h, t, err := parseLCOVFile(*lcovPath)
if err != nil {
return fmt.Errorf("parse lcov file %q: %w", *lcovPath, err)
}
cppHits, cppTotal = h, t
summary.CPP = percentage(h, t)
}
summary.Total = percentage(goHits+cppHits, goTotal+cppTotal)
return emitSummary(&summary, *outPath, stdout)
}
// emitSummary marshals to JSON (indented, deterministic field order
// because Summary's fields are declared in the order we want) and
// writes either to a file or stdout. A trailing newline keeps the
// output friendly to `cat`.
func emitSummary(s *Summary, outPath string, stdout io.Writer) error {
buf, err := json.MarshalIndent(s, "", " ")
if err != nil {
return fmt.Errorf("marshal summary: %w", err)
}
buf = append(buf, '\n')
if outPath == "" {
_, err = stdout.Write(buf)
return err
}
//nolint:gosec // 0o644 is the right mode for a CI-published JSON artifact.
if err := os.WriteFile(outPath, buf, 0o644); err != nil {
return fmt.Errorf("write %q: %w", outPath, err)
}
return nil
}
// readSummary loads a summary file previously written by
// runSummarize. Used by both the badge and gate subcommands.
func readSummary(path string) (*Summary, error) {
//nolint:gosec // CLI tool; reading caller-supplied paths is the point.
buf, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("read %q: %w", path, err)
}
var s Summary
if err := json.Unmarshal(buf, &s); err != nil {
return nil, fmt.Errorf("decode %q: %w", path, err)
}
return &s, nil
}
package main
import (
"bytes"
"fmt"
"io"
"regexp"
"sort"
"strings"
)
// Finding is a single source-only rule violation. It is the
// reporting unit produced by every check below; the runner formats
// findings into the standard `path:line:col: rule: message` shape
// (compatible with editor jump-to-line) before exiting.
type Finding struct {
// Rule is the stable identifier callers grep for, e.g.
// `file-length`, `banned-logging`, `status-discarded`.
Rule string
// Path is repo-relative, matching the `cpp-lint list` output.
Path string
// Line is 1-based; 0 means "applies to the whole file".
Line int
// Col is 1-based, 0 when the rule is line-level.
Col int
// Message is the human-readable explanation. Should fit on one
// line; longer guidance belongs in the docs the message links
// to.
Message string
}
// Format renders a Finding into the `path:line:col: rule: msg`
// convention. Editors jump to the right location when stderr is
// piped through the standard error filter.
func (f Finding) Format() string {
switch {
case f.Line > 0 && f.Col > 0:
return fmt.Sprintf("%s:%d:%d: %s: %s", f.Path, f.Line, f.Col, f.Rule, f.Message)
case f.Line > 0:
return fmt.Sprintf("%s:%d: %s: %s", f.Path, f.Line, f.Rule, f.Message)
default:
return fmt.Sprintf("%s: %s: %s", f.Path, f.Rule, f.Message)
}
}
// CheckOptions bundles the knobs every per-file check needs to
// know about. Centralised so the runner threads them through one
// argument instead of growing an ever-longer parameter list as new
// rules land.
type CheckOptions struct {
// MaxFileLines is the whole-file line count above which a file
// is rejected. 500 today (see plan thresholds).
MaxFileLines int
}
// runOnce applies every source-only check to a single file and
// returns the set of findings. The returned slice is sorted by
// (line, rule) for deterministic output regardless of map
// iteration order in the underlying check helpers, and findings
// suppressed via inline `// cpp-lint:allow(rule) ...` comments are
// dropped before returning.
func runOnce(path string, body []byte, opts CheckOptions) []Finding {
lines := splitLines(body)
suppressions := collectSuppressions(lines)
var out []Finding
out = append(out, checkFileLength(path, body, opts)...)
out = append(out, checkBannedLogging(path, lines)...)
out = append(out, checkStatusAntiPatterns(path, lines)...)
out = filterSuppressed(out, suppressions)
sort.SliceStable(out, func(i, j int) bool {
if out[i].Line != out[j].Line {
return out[i].Line < out[j].Line
}
return out[i].Rule < out[j].Rule
})
return out
}
// splitLines slices `body` into lines, preserving 1-based numbering
// when the slice is later indexed via lines[lineNo-1]. We split on
// '\n' so a CRLF file still produces clean line content (the
// trailing '\r' is left in place; rule helpers strip it via the
// shared normaliser when needed).
func splitLines(body []byte) []string {
if len(body) == 0 {
return nil
}
s := strings.TrimSuffix(string(body), "\n")
return strings.Split(s, "\n")
}
// suppressMarkerRE recognises a suppression comment of the form
// `// cpp-lint:allow(rule[, rule]) -- reason text`. The marker may
// sit at the end of the offending line OR on a comment-only line
// directly preceding it; both placements suppress the listed rules
// for the next code line. We support both styles because
// clang-format may wrap a long trailing comment, but a marker on
// its own line stays on its own line.
//
// The trailing reason is mandatory: a `nolint`-style suppression
// that does not say WHY is harder to audit later. A marker that
// omits the `-- reason` body silently fails to suppress.
var suppressMarkerRE = regexp.MustCompile(`(?:^|\s)//\s*cpp-lint:allow\(([^)]*)\)\s*--\s*(.+?)\s*$`)
// suppression records the rule set that an inline marker disables
// on a particular source line.
type suppression struct {
Rules map[string]struct{}
}
// collectSuppressions returns the line -> rule-set map. A marker
// on line N applies to line N, and additionally to the next code
// line (the first subsequent line that is not blank and not a
// pure comment) when the marker itself sits on a comment-only
// line. The latter handles clang-format wrapping a long trailing
// comment over two lines: the marker now lives on a comment-only
// line whose target is the first code line that follows.
func collectSuppressions(lines []string) map[int]suppression {
out := map[int]suppression{}
for i, raw := range lines {
m := suppressMarkerRE.FindStringSubmatch(raw)
if m == nil {
continue
}
rules := parseSuppressionRules(m[1])
mergeSuppression(out, i+1, rules)
if isCommentOnlyLine(raw) {
if next := nextCodeLine(lines, i+1); next > 0 {
mergeSuppression(out, next, rules)
}
}
}
return out
}
func parseSuppressionRules(spec string) map[string]struct{} {
rules := map[string]struct{}{}
for r := range strings.SplitSeq(spec, ",") {
r = strings.TrimSpace(r)
if r == "" {
continue
}
rules[r] = struct{}{}
}
return rules
}
func mergeSuppression(dst map[int]suppression, line int, rules map[string]struct{}) {
cur, ok := dst[line]
if !ok {
cur = suppression{Rules: map[string]struct{}{}}
}
for r := range rules {
cur.Rules[r] = struct{}{}
}
dst[line] = cur
}
// isCommentOnlyLine returns true when the line contains nothing
// outside whitespace and a `//` comment. We treat block comments
// (`/* ... */` on the same line) as code so a marker buried inside
// a multi-line block comment does not accidentally apply to the
// next statement.
func isCommentOnlyLine(line string) bool {
trimmed := strings.TrimSpace(line)
return strings.HasPrefix(trimmed, "//")
}
// nextCodeLine returns the 1-based line number of the first line
// at or after `start` (1-based) that contains executable code.
// Blank lines and pure-comment lines are skipped. Returns 0 when
// no such line exists in the file.
func nextCodeLine(lines []string, start int) int {
for i := start; i < len(lines); i++ {
raw := lines[i]
if strings.TrimSpace(raw) == "" {
continue
}
if isCommentOnlyLine(raw) {
continue
}
return i + 1
}
return 0
}
func filterSuppressed(in []Finding, sup map[int]suppression) []Finding {
if len(sup) == 0 {
return in
}
out := in[:0]
for _, f := range in {
if entry, ok := sup[f.Line]; ok {
if _, dropped := entry.Rules[f.Rule]; dropped {
continue
}
}
out = append(out, f)
}
return out
}
// --- file-length --------------------------------------------------------
const (
ruleFileLength = "file-length"
)
// checkFileLength enforces the whole-file line cap. The plan
// chooses 500 lines for first-party `.cc`/`.h` to mirror the Go
// `revive` `file-length-limit` rule already enabled in
// `.golangci.yml`.
func checkFileLength(path string, body []byte, opts CheckOptions) []Finding {
if opts.MaxFileLines <= 0 {
return nil
}
lines := countLines(body)
if lines <= opts.MaxFileLines {
return nil
}
return []Finding{
{
Rule: ruleFileLength,
Path: path,
Message: fmt.Sprintf(
"file has %d lines (max %d); split the file",
lines,
opts.MaxFileLines,
),
},
}
}
// countLines returns the number of newline-terminated lines in the
// file plus one for any trailing partial line. Matches `wc -l`'s
// off-by-one behaviour for files that do not end in `\n`, so the
// reported count agrees with what `wc -l` shows in CI logs.
func countLines(body []byte) int {
if len(body) == 0 {
return 0
}
count := bytes.Count(body, []byte{'\n'})
if body[len(body)-1] != '\n' {
count++
}
return count
}
// --- banned-logging ----------------------------------------------------
const ruleBannedLogging = "banned-logging"
// bannedLoggingPatterns is the (rule-message-friendly) list of
// production logging APIs we explicitly do not allow inside
// production C++. Tests, fixture printers, and the smoke binaries
// under `tools/googlesql-prebuilt/smoke/` may use these; the
// runner skips the rule for those paths via isLoggingAllowed().
//
// Each entry is a literal substring match that is anchored to a
// non-identifier boundary, so `kStdCoutName` does not falsely
// match `std::cout`. The patterns are kept simple — a regex with
// capture groups would be overkill for a list this small.
var bannedLoggingPatterns = []struct {
needle string
message string
}{
{
"std::cout",
"std::cout is banned in production C++; use absl::Status / structured logging via the gRPC error envelope",
},
{"std::cerr", "std::cerr is banned in production C++; surface errors through absl::Status / grpc::Status instead"},
{"std::clog", "std::clog is banned in production C++; route diagnostics through absl::Status / grpc::Status"},
{
"std::printf",
"std::printf is banned in production C++; use absl::StrCat / absl::StrFormat and return errors via Status",
},
{
"std::fprintf",
"std::fprintf is banned in production C++; surface diagnostics through absl::Status / grpc::Status",
},
}
// printfWordRE matches a top-level `printf(` or `fprintf(` call
// (no `std::` prefix, no `::` either). This catches the common
// `<cstdio>` / `<stdio.h>` variants without flagging field /
// member-named `printf` (`obj.printf(...)`). The look-behind is
// expressed as a non-capturing group followed by a manual
// boundary check inside checkBannedLogging.
var printfWordRE = regexp.MustCompile(`\b(f?printf)\s*\(`)
func checkBannedLogging(path string, lines []string) []Finding {
if isLoggingAllowed(path) {
return nil
}
var out []Finding
for i, raw := range lines {
lineNo := i + 1
stripped := stripCommentsAndStrings(raw)
if stripped == "" {
continue
}
for _, p := range bannedLoggingPatterns {
if idx := strings.Index(stripped, p.needle); idx >= 0 {
out = append(out, Finding{
Rule: ruleBannedLogging,
Path: path,
Line: lineNo,
Col: idx + 1,
Message: p.message,
})
}
}
// printf / fprintf without an `std::` prefix. Skip the
// match when it is preceded by `::` (already covered by
// the std::printf needle), `.` / `->` (member call), or
// an identifier character (avoids false positives on
// `kSomePrefixprintf`-style symbols).
for _, m := range printfWordRE.FindAllStringIndex(stripped, -1) {
start := m[0]
if start > 0 {
prev := stripped[start-1]
if isIdentChar(prev) || prev == '.' || prev == '>' || prev == ':' {
continue
}
}
out = append(out, Finding{
Rule: ruleBannedLogging,
Path: path,
Line: lineNo,
Col: start + 1,
Message: "printf / fprintf is banned in production C++; surface errors through absl::Status",
})
}
}
return out
}
// isLoggingAllowed returns true when the file is a C++ test, the
// smoke binary, or anything under `binaries/emulator_main/main.cc`
// (which legitimately writes the `--help` / `--version` output to
// stdout/stderr at process start).
//
// The exemptions are deliberately narrow: the `main.cc` carve-out
// stays a literal path match so a future `binaries/foo/main.cc`
// has to opt in explicitly, and the smoke directory matches by
// prefix because every file under it is non-production.
func isLoggingAllowed(path string) bool {
if IsTestFile(path) {
return true
}
if strings.HasPrefix(path, "tools/googlesql-prebuilt/smoke/") {
return true
}
if path == SentinelEmulatorMain {
return true
}
return false
}
// stripCommentsAndStrings returns `line` with `//` comments and
// double-quoted strings replaced by spaces of the same length.
// This is a deliberately small lexer — it does not follow `/*
// */` block comments across newlines or recognise raw string
// literals — but it is sufficient to keep `// std::cerr is bad`
// comments and `"std::cout"` literals from raising findings while
// still catching real violations on any normal source line.
//
// We pad with spaces (rather than truncate) so column numbers in
// findings still match the on-disk file. Block comments and raw
// strings are flagged as a known-limitation in the package README;
// a real lexer would be overkill for a check that already runs in
// well under a second.
func stripCommentsAndStrings(line string) string {
var out strings.Builder
out.Grow(len(line))
inString := false
escape := false
for i := 0; i < len(line); i++ {
c := line[i]
if inString {
out.WriteByte(' ')
if escape {
escape = false
continue
}
switch c {
case '\\':
escape = true
case '"':
inString = false
}
continue
}
if c == '"' {
inString = true
out.WriteByte(' ')
continue
}
if c == '/' && i+1 < len(line) && line[i+1] == '/' {
// Pad the rest of the line so column numbers stay
// aligned with the original buffer.
for ; i < len(line); i++ {
out.WriteByte(' ')
}
break
}
out.WriteByte(c)
}
return out.String()
}
func isIdentChar(b byte) bool {
return (b >= 'a' && b <= 'z') ||
(b >= 'A' && b <= 'Z') ||
(b >= '0' && b <= '9') ||
b == '_'
}
// --- status-anti-patterns ---------------------------------------------
const (
ruleStatusDiscarded = "status-discarded"
ruleStatusOrUnchecked = "statusor-unchecked-value"
)
// statusReturnRE matches a top-level call whose result is a
// statement-form `absl::Status` (or `Status` in the
// `bigquery_emulator` namespace). The pattern is intentionally
// simple: any line whose semicolon-terminated statement looks like
// `f(args);` AND whose textual context names a known
// status-returning function gets reported. Real
// `absl::Status s = f(); if (!s.ok()) ...` flows are not flagged
// because the assignment ends with `=` rather than `f(args);`.
//
// We avoid trying to be a parser — clang-tidy's
// `bugprone-unused-return-value` is the long-term mechanism for
// this — but the regex catches the obvious case where someone
// types `engine.ExecuteDdl(...)` and forgets to inspect the
// result.
var statusCallStmtRE = regexp.MustCompile(
`^\s*([A-Za-z_][A-Za-z_0-9]*::)*([A-Za-z_][A-Za-z_0-9]*\s*\.\s*)?([A-Za-z_][A-Za-z_0-9]*)\(`,
)
// statusOrValueRE matches `.value()` invocations on a `StatusOr`
// without a preceding `.ok()` guard on the same line. The check
// runs per-line so it is necessarily approximate; the value of
// catching even the obvious cases outweighs the false-positive
// risk because the suggested replacement (`*r` after a `.ok()`
// check, or `r.value_or(default)` with an explicit fallback) is
// almost always cleaner.
var statusOrValueRE = regexp.MustCompile(`\.value\(\)`)
// statusOrOkRE detects an inline `.ok()` check on the same source
// line as the `.value()` call. When present, we skip the
// `.value()` finding because the code is already guarding the
// dereference.
var statusOrOkRE = regexp.MustCompile(`\.ok\(\)`)
// checkStatusAntiPatterns surfaces two concrete failure modes:
// - `RunSql(...);` (a discarded `absl::Status`).
// - `result.value()` with no `.ok()` / status guard within a
// short window of the call (a `StatusOr` access that crashes
// on an absent value).
//
// Both rules are deliberately conservative — clang-tidy's
// `bugprone-unused-return-value` and
// `bugprone-unchecked-optional-access` are the long-term
// mechanism. The checks here exist so `task lint:run` can still
// catch the most common review nits without spinning up the full
// compile-aware lane.
func checkStatusAntiPatterns(path string, lines []string) []Finding {
if IsTestFile(path) {
return nil
}
var out []Finding
for i, raw := range lines {
lineNo := i + 1
stripped := stripCommentsAndStrings(raw)
if stripped == "" {
continue
}
out = append(out, scanDiscardedStatus(path, lineNo, stripped)...)
out = append(out, scanStatusOrValue(path, lineNo, stripped, lines)...)
}
return out
}
// statusDiscardedFunctions lists functions whose return value
// must always be inspected. The list is small on purpose: this
// helper exists for repo-specific rules that clang-tidy's
// generic `[[nodiscard]]` plumbing cannot model without seeing
// the headers. Once `[[nodiscard]]` annotations land on the
// matching declarations, clang-tidy will surface the same
// findings during `task lint:cpp:tidy`.
var statusDiscardedFunctions = map[string]struct{}{
// Engine.ExecuteDdl returns absl::Status; ignoring it loses
// the failure that the gateway needs to surface as a 4xx /
// 5xx response.
"ExecuteDdl": {},
// Storage.AppendRows returns absl::Status; ignoring it
// silently drops a streaming insert.
"AppendRows": {},
// Storage.OverwriteRows returns absl::Status.
"OverwriteRows": {},
// Storage.DropTable returns absl::Status.
"DropTable": {},
}
func scanDiscardedStatus(path string, line int, stripped string) []Finding {
// Cheap pre-filter: a discarded-status statement always ends in `;`.
if !strings.HasSuffix(strings.TrimSpace(stripped), ";") {
return nil
}
m := statusCallStmtRE.FindStringSubmatch(stripped)
if m == nil {
return nil
}
fn := m[3]
if _, banned := statusDiscardedFunctions[fn]; !banned {
return nil
}
return []Finding{{
Rule: ruleStatusDiscarded,
Path: path,
Line: line,
Col: 1,
Message: fmt.Sprintf("discarded absl::Status return from %q; capture and inspect via if (!s.ok()) { ... }", fn),
}}
}
// statusOrLookbackLines is the number of preceding non-blank lines
// scanStatusOrValue inspects when looking for an `.ok()` guard.
// Five lines is enough to cover the canonical pattern:
//
// absl::StatusOr<T> rendered = ...;
// if (!rendered.ok()) return rendered.status();
// *out = std::move(rendered).value();
//
// while still catching the obvious "fetch -> dereference" calls
// that lack any guard.
const statusOrLookbackLines = 5
func scanStatusOrValue(path string, line int, stripped string, lines []string) []Finding {
if !statusOrValueRE.MatchString(stripped) {
return nil
}
if statusOrOkRE.MatchString(stripped) {
return nil
}
if hasNearbyStatusGuard(lines, line) {
return nil
}
idx := statusOrValueRE.FindStringIndex(stripped)
return []Finding{
{
Rule: ruleStatusOrUnchecked,
Path: path,
Line: line,
Col: idx[0] + 1,
Message: "StatusOr<T>::value() without a nearby .ok() guard; check status before unwrapping (or annotate with `// cpp-lint:allow(statusor-unchecked-value) -- reason` if intentional)",
},
}
}
// statusGuardRE matches the patterns we treat as a `.value()`
// safety net:
//
// - `if (!x.ok())` — the canonical guard.
// - `return x.status()` — the early-return inside a guard.
// - `RETURN_IF_ERROR(x)` — the absl macro pattern.
// - `ASSIGN_OR_RETURN(...)` — likewise.
//
// We intentionally over-accept here. False negatives (a missed
// finding because of a permissive guard pattern) are acceptable;
// false positives (a screaming finding on already-safe code) are
// not, because they erode trust in the rule.
var statusGuardRE = regexp.MustCompile(
`!\s*[A-Za-z_][A-Za-z_0-9]*\s*\.\s*ok\(\)|RETURN_IF_ERROR\s*\(|ASSIGN_OR_RETURN\s*\(|\.\s*status\(\)`,
)
func hasNearbyStatusGuard(lines []string, line int) bool {
from := max(line-statusOrLookbackLines, 1)
for n := line - 1; n >= from; n-- {
stripped := stripCommentsAndStrings(lines[n-1])
if statusGuardRE.MatchString(stripped) {
return true
}
}
return false
}
// runCheck is the `cpp-lint check` subcommand. It loads the
// canonical first-party source list, runs every per-file rule,
// prints findings to stdout, and exits with `errFindings` when at
// least one rule reported a violation.
//
// Flags:
// - `--max-lines` overrides the file-length cap (default 500).
func runCheck(args []string, stdout, stderr io.Writer) error {
fs := flagSet("check", stderr)
maxLines := fs.Int("max-lines", 500, "fail when a first-party C++ file exceeds this many lines")
if err := fs.Parse(args); err != nil {
return errUsage
}
if fs.NArg() != 0 {
_, _ = fmt.Fprintln(stderr, "cpp-lint check: takes no positional arguments")
return errUsage
}
files, root, err := readSources()
if err != nil {
return err
}
opts := CheckOptions{MaxFileLines: *maxLines}
var totalFindings int
for _, rel := range files {
body, rerr := readFile(resolveAgainstRoot(root, rel))
if rerr != nil {
return fmt.Errorf("read %s: %w", rel, rerr)
}
for _, f := range runOnce(rel, body, opts) {
_, _ = fmt.Fprintln(stdout, f.Format())
totalFindings++
}
}
if totalFindings > 0 {
_, _ = fmt.Fprintf(stderr, "cpp-lint: %d finding(s)\n", totalFindings)
return errFindings
}
return nil
}
// Command cpp-lint is the BigQuery emulator's first-party C++
// source-only lint runner.
//
// It exists to enforce three classes of rule that clang-format and
// clang-tidy do not cover well in this repo:
//
// 1. The list of files we own (vs. vendored / generated / cached
// trees) is computed in exactly one place. Every C++ lint task
// consumes it via `cpp-lint list`, so a path that is not first
// party can never sneak into clang-format, clang-tidy, or
// cppcheck.
// 2. A whole-file line-count rule (default 500 lines) that
// clang-tidy's `readability-function-size` cannot express.
// 3. Repo-specific anti-patterns: banned production logging APIs
// (`std::cout` / `std::cerr` / `printf` outside tests and
// tools), and obvious `absl::Status` / `absl::StatusOr<T>`
// misuse such as a discarded `Status` return or `.value()` on a
// `StatusOr` without a status check first.
//
// The binary deliberately depends only on the standard library so a
// fresh checkout can run `go run ./tools/lint/cpp` (or the
// `task lint:cpp:source` wrapper) without bootstrapping anything
// extra. Subcommands and flags follow the same shape as
// `tools/coverage` so contributors recognise the layout.
package main
import (
"errors"
"flag"
"fmt"
"io"
"os"
)
// Subcommand names. Centralised so the dispatch in run() and the
// table-driven tests stay aligned.
const (
cmdList = "list"
cmdCheck = "check"
cmdParseTidyLog = "parse-tidy-log"
)
// run is the testable entry point. It returns an error instead of
// calling os.Exit so tests can drive the full code path with
// table-driven fixtures without managing process lifetime.
func run(args []string, stdout, stderr io.Writer) error {
if len(args) < 1 {
usage(stderr)
return errUsage
}
cmd, rest := args[0], args[1:]
switch cmd {
case cmdList:
return runList(rest, stdout, stderr)
case cmdCheck:
return runCheck(rest, stdout, stderr)
case cmdParseTidyLog:
return runParseTidyLog(rest, stdout, stderr)
case "-h", "--help", "help":
usage(stdout)
return nil
default:
_, _ = fmt.Fprintf(stderr, "cpp-lint: unknown subcommand %q\n\n", cmd)
usage(stderr)
return errUsage
}
}
// errUsage maps to exit code 2, matching the Go `flag` package's
// convention so wrappers can distinguish "you used it wrong" from a
// real lint failure (exit 1).
var errUsage = errors.New("usage error")
// errFindings signals that the lint run completed cleanly but at
// least one rule reported a finding. Wrappers exit 1 on this so CI
// can tell rule violations apart from infrastructure failures.
var errFindings = errors.New("lint findings")
func usage(w io.Writer) {
_, _ = fmt.Fprint(w, `cpp-lint - first-party C++ source-only lint runner.
Subcommands:
list Print the canonical first-party C++ source list (one path per line).
check Run the source-only checks (file size, banned logging, status misuse).
parse-tidy-log Parse lint-cpp-tidy.log into CSV + triage markdown.
Run "cpp-lint <subcommand> -h" for per-subcommand flags.
`)
}
// flagSet builds a FlagSet that prints its usage to stderr and stops
// on the first error. Centralising this keeps every subcommand
// behaving the same way (and keeps tests from being polluted by the
// stdlib's default ExitOnError behaviour).
func flagSet(name string, stderr io.Writer) *flag.FlagSet {
fs := flag.NewFlagSet(name, flag.ContinueOnError)
fs.SetOutput(stderr)
return fs
}
func main() {
if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
switch {
case errors.Is(err, errUsage):
os.Exit(2)
case errors.Is(err, errFindings):
os.Exit(1)
default:
_, _ = fmt.Fprintf(os.Stderr, "cpp-lint: %v\n", err)
os.Exit(1)
}
}
}
package main
import (
"bufio"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
)
// firstPartyIncludeRoots names the only directories that contain
// hand-written, repo-owned C++ that the lint stack is allowed to
// touch. Every other root is either vendored upstream code,
// generated output, a Bazel cache, or build / ops artifacts that
// must not be reformatted by us.
//
// The list is intentionally short and explicit: adding a new
// first-party C++ tree should be a deliberate edit here, not a side
// effect of someone dropping a `.cc` file into an unrelated
// directory.
var firstPartyIncludeRoots = []string{
"backend/",
"binaries/",
"frontend/",
"tools/googlesql-prebuilt/smoke/",
}
// firstPartyExcludePrefixes lists path prefixes that look first
// party at a glance but are either generated or wrappers around
// upstream code. They are excluded after the include filter so a
// future first-party file under one of the parent directories does
// not accidentally inherit the exclusion.
//
// - `tools/googlesql-prebuilt/templates/` is `cc_library` glue we
// stamp into the prebuilt artifact's BUILD file; the C++ headers
// under it shadow GoogleSQL surface types and follow upstream
// style.
// - `binaries/emulator_main/version.cc` is genrule output that
// `version_gen.sh` writes from `version.cc.tmpl`. The template
// itself stays in the source list because we hand-write it.
var firstPartyExcludePrefixes = []string{
"tools/googlesql-prebuilt/templates/",
}
// firstPartyExcludePaths lists individual files that match the
// include filter but must never be linted. Generated artifacts and
// Bazel-stamped outputs go here.
var firstPartyExcludePaths = map[string]struct{}{
// The genrule output for `binaries/emulator_main:version_cc`.
// Bazel may stage it under `bazel-out/`, but a stray symlink
// inside the worktree (or a `bazel run` artifact) must not
// pull it into the lint set.
"binaries/emulator_main/version.cc": {},
}
// firstPartyExtensions lists the file extensions we treat as C++
// sources for lint purposes. Headers and source files are listed
// together because clang-format, clang-tidy, and cppcheck all
// expect both to share a single configuration set, and the
// source-only checks (file size, banned logging) apply uniformly.
var firstPartyExtensions = []string{".cc", ".cpp", ".cxx", ".h", ".hpp", ".hh"}
// SourceLister returns the set of tracked C++ files we own. The
// real implementation shells out to `git ls-files`; tests provide a
// fixture-driven stub via the same interface so they do not need a
// live git repo.
type SourceLister interface {
List() ([]string, error)
}
// gitSourceLister is the production SourceLister. It uses
// `git ls-files` so that .gitignore and the working tree state
// determine the answer — not a brittle filesystem walk that would
// pick up Bazel symlinks (`bazel-*`), the `.cache/` tree, or
// deleted-but-still-on-disk files.
type gitSourceLister struct {
// repoRoot is passed to `git -C` so the lister works from any
// subdirectory and from inside test temp dirs.
repoRoot string
}
// newGitSourceLister returns a SourceLister rooted at the current
// working directory's enclosing git repo. The repo discovery is
// done eagerly so callers fail fast when run outside a checkout.
func newGitSourceLister() (*gitSourceLister, error) {
root, err := repoRoot()
if err != nil {
return nil, err
}
return &gitSourceLister{repoRoot: root}, nil
}
// List enumerates the first-party C++ files in repo order. The
// order is stable across runs because `git ls-files` already sorts
// by path; we sort again defensively after applying the filters so
// downstream tools see a deterministic ordering even if git's
// internal order ever changes.
func (g *gitSourceLister) List() ([]string, error) {
out, err := gitLsFiles(g.repoRoot)
if err != nil {
return nil, err
}
return filterFirstParty(out), nil
}
// repoRoot returns the absolute path of the enclosing git repo.
// We prefer `git rev-parse --show-toplevel` over walking up looking
// for a `.git` directory because the latter misbehaves inside git
// worktrees (`.git` is a regular file there, not a directory).
func repoRoot() (string, error) {
cmd := exec.Command("git", "rev-parse", "--show-toplevel")
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf("git rev-parse: %w", err)
}
return strings.TrimSpace(string(out)), nil
}
// gitLsFiles asks git for every tracked file in the working tree.
// We deliberately do NOT pass globs here: a `.cc` file under
// `third_party/` is still tracked, and we want the include /
// exclude lists below to be the single source of truth for what
// "first party" means. Filtering server-side via globs would let a
// pattern bug silently include vendored code.
func gitLsFiles(dir string) ([]string, error) {
// #nosec G204 -- 'git' is a fixed binary; dir is the lint tool's
// repo root.
cmd := exec.Command("git", "-C", dir, "ls-files")
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("git ls-files: %w", err)
}
scanner := bufio.NewScanner(strings.NewReader(string(out)))
scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
var files []string
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
files = append(files, line)
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("scan ls-files: %w", err)
}
return files, nil
}
// filterFirstParty applies the include / exclude rules to a raw
// `git ls-files` output. It is the single function tests pin so
// the ownership boundary cannot drift silently.
func filterFirstParty(all []string) []string {
var out []string
for _, p := range all {
// Normalise forward slashes early. `git ls-files` always
// uses `/` on every platform git supports, but a stray
// backslash from a hand-built test fixture or a Windows
// host should still route through the include filter,
// so replace backslashes directly. (filepath.ToSlash is
// a no-op on linux.)
p = strings.ReplaceAll(p, `\`, "/")
if !hasCPPExtension(p) {
continue
}
if !underAnyPrefix(p, firstPartyIncludeRoots) {
continue
}
if underAnyPrefix(p, firstPartyExcludePrefixes) {
continue
}
if _, dropped := firstPartyExcludePaths[p]; dropped {
continue
}
out = append(out, p)
}
sort.Strings(out)
return out
}
// hasCPPExtension returns true when `path` looks like a C++ source
// or header file by extension. The check is intentionally
// case-sensitive because every first-party file in this repo
// already uses lowercase extensions and the surrounding tooling
// (clang-format, clang-tidy) follows the same convention.
func hasCPPExtension(path string) bool {
for _, ext := range firstPartyExtensions {
if strings.HasSuffix(path, ext) {
return true
}
}
return false
}
// underAnyPrefix returns true when `path` lives under at least one
// of the supplied directory prefixes. Prefixes must end with `/`
// so a directory named like `backend2/` cannot match
// `backend/`.
func underAnyPrefix(path string, prefixes []string) bool {
for _, prefix := range prefixes {
if strings.HasPrefix(path, prefix) {
return true
}
}
return false
}
// IsTestFile returns true for paths that look like first-party C++
// tests. The convention in this repo is `*_test.cc` next to the
// implementation file, identical to googletest's recommendation;
// we do not have any test-only headers today.
//
// Source-only rules that need to relax for tests (e.g. allowing
// `std::cout` in fixture printers) consult this helper rather than
// hard-coding a path list, so an `*_test.cc` added to a new
// directory is treated correctly without an extra edit here.
func IsTestFile(path string) bool {
base := filepath.Base(path)
return strings.HasSuffix(base, "_test.cc") || strings.HasSuffix(base, "_test.cpp")
}
// IsClangTidyTranslationUnit returns true for `.cc` / `.cpp` / `.cxx`
// files that clang-tidy should analyze as standalone translation units.
// Headers are excluded: `compile_commands.json` lists only source
// files, so linting a header synthesizes a TU without Bazel's include
// paths and produces bogus `clang-diagnostic-error` findings (notably
// on `*_test_fixture.h` and other header-only helpers).
func IsClangTidyTranslationUnit(path string) bool {
return strings.HasSuffix(path, ".cc") ||
strings.HasSuffix(path, ".cpp") ||
strings.HasSuffix(path, ".cxx")
}
// runList is the `cpp-lint list` subcommand. It prints the
// first-party C++ source list, one path per line, suitable for
// piping into `xargs clang-format`, `xargs clang-tidy`, or any
// other downstream tool.
func runList(args []string, stdout, stderr io.Writer) error {
fs := flagSet("list", stderr)
withTests := fs.Bool("tests", true, "include *_test.cc files in the output")
tidyOnly := fs.Bool("tidy", false, "emit only .cc/.cpp/.cxx translation units (for clang-tidy)")
if err := fs.Parse(args); err != nil {
return errUsage
}
if fs.NArg() != 0 {
_, _ = fmt.Fprintln(stderr, "cpp-lint list: takes no positional arguments")
return errUsage
}
lister, err := newGitSourceLister()
if err != nil {
return err
}
files, err := lister.List()
if err != nil {
return err
}
root, err := repoRoot()
if err != nil {
return err
}
files = filterExistingOnDisk(root, files)
for _, f := range files {
if !*withTests && IsTestFile(f) {
continue
}
if *tidyOnly && !IsClangTidyTranslationUnit(f) {
continue
}
_, _ = fmt.Fprintln(stdout, f)
}
return nil
}
// readSources returns the first-party source list using the
// production lister. Subcommands that need the list call this
// helper rather than constructing a lister themselves so the test
// suite has a single seam to swap in fixtures.
//
// `repoRoot` is returned alongside so callers can resolve relative
// paths against the same root the lister used. Tests override the
// resolver via the package-level `currentRepoRoot` variable below.
func readSources() ([]string, string, error) {
root := currentRepoRoot()
if root == "" {
discovered, err := repoRoot()
if err != nil {
return nil, "", err
}
root = discovered
}
lister := &gitSourceLister{repoRoot: root}
files, err := lister.List()
if err != nil {
return nil, "", err
}
files = filterExistingOnDisk(root, files)
return files, root, nil
}
// filterExistingOnDisk drops git-tracked paths that are not present
// on disk (for example after a split where old monoliths were deleted
// but `git rm` has not landed yet). Downstream tools such as
// clang-format would fail on the missing paths anyway.
func filterExistingOnDisk(root string, files []string) []string {
out := make([]string, 0, len(files))
for _, rel := range files {
if _, err := os.Stat(resolveAgainstRoot(root, rel)); err == nil {
out = append(out, rel)
}
}
return out
}
// currentRepoRoot is a test seam. Tests set it via setRepoRoot to
// pin the lister at a fixture worktree without exporting the
// internal type. The variable is consulted only by readSources()
// and resolveAgainstRoot() so production code paths stay
// unaffected when it is empty.
var testRepoRoot string //nolint:gochecknoglobals // test seam, see setRepoRoot
func currentRepoRoot() string { return testRepoRoot }
// setRepoRoot pins the test-only repo root. It returns a cleanup
// function so test cases can use `defer setRepoRoot(t, dir)()` to
// restore the previous value (always the empty string in
// practice).
func setRepoRoot(dir string) func() {
prev := testRepoRoot
testRepoRoot = dir
return func() { testRepoRoot = prev }
}
// resolveAgainstRoot joins a first-party-relative path with the
// repo root. We never accept absolute paths from the source lister
// because every downstream consumer expects repo-relative output;
// the repo root is only spliced back in when the check needs to
// open the file from disk.
func resolveAgainstRoot(root, rel string) string {
return filepath.Join(root, filepath.FromSlash(rel))
}
// readFile returns the file contents at path. Centralised so the
// check helpers do not each grow their own ioutil-style boilerplate
// (and so a future swap to memory-mapped reads has a single seam).
func readFile(path string) ([]byte, error) {
//nolint:gosec // Paths come from the first-party source lister, which is itself tested.
return os.ReadFile(path)
}
package main
import (
"encoding/csv"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// tidyFinding is one clang-tidy diagnostic extracted from a batch log.
type tidyFinding struct {
BlockFile string // first-party file block being linted
File string // diagnostic path (normalized when possible)
Line int
Column int
Severity string
Check string
Symbol string
Message string
ComplexityScore int // 0 when not a complexity finding
}
const (
fileKindTest = "test"
fileKindFixture = "fixture"
dispFix = "fix"
)
var (
reFileBlock = regexp.MustCompile(`^========== (.+) ==========$`)
reFailed = regexp.MustCompile(`^FAILED: (.+)$`)
reProcessing = regexp.MustCompile(`^Error while processing (.+)\.$`)
// path:line:col: severity: message [check-name,...]
reDiagnostic = regexp.MustCompile(`^(.+?):(\d+):(\d+): (warning|error|note): (.+?)(?: \[(.+?)\])?$`)
reComplexity = regexp.MustCompile(`function '([^']+)' has cognitive complexity of (\d+)`)
)
func runParseTidyLog(args []string, stdout, stderr io.Writer) error {
fs := flagSet(cmdParseTidyLog, stderr)
logPath := fs.String("log", "lint-cpp-tidy.log", "clang-tidy batch log path")
csvPath := fs.String("csv", "lint-cpp-tidy.csv", "CSV output path")
mdPath := fs.String("markdown", "docs/dev/cpp-lint-tidy-triage.md", "triage markdown output path")
if err := fs.Parse(args); err != nil {
return errUsage
}
f, err := os.Open(*logPath)
if err != nil {
return fmt.Errorf("open log: %w", err)
}
defer func() { _ = f.Close() }()
findings, failedFiles, totalBlocks := parseTidyLog(f)
if err := writeTidyCSV(*csvPath, findings); err != nil {
return err
}
if err := writeTriageMarkdown(*mdPath, findings, failedFiles, totalBlocks); err != nil {
return err
}
printTidySummary(stdout, findings, failedFiles, totalBlocks, *csvPath, *mdPath)
return nil
}
func parseTidyLog(r io.Reader) ([]tidyFinding, map[string]struct{}, int) {
content, err := io.ReadAll(r)
if err != nil {
return nil, nil, 0
}
lines := strings.Split(string(content), "\n")
failedFiles := make(map[string]struct{})
totalBlocks := 0
for _, line := range lines {
if m := reFileBlock.FindStringSubmatch(line); m != nil {
totalBlocks++
continue
}
if m := reFailed.FindStringSubmatch(line); m != nil {
failedFiles[strings.TrimSpace(m[1])] = struct{}{}
}
}
var findings []tidyFinding
lastFailed := -1
for i, line := range lines {
m := reFailed.FindStringSubmatch(line)
if m == nil {
continue
}
failedPath := strings.TrimSpace(m[1])
start := lastFailed + 1
lastFailed = i
findings = append(findings, findingsForFailure(lines[start:i+1], failedPath)...)
}
return findings, failedFiles, totalBlocks
}
// findingsForFailure attributes diagnostics in a FAILED window onto the failed TU.
func findingsForFailure(window []string, failedPath string) []tidyFinding {
var out []tidyFinding
processingTU := failedPath
windowMentionsFailed := false
for _, line := range window {
if strings.Contains(line, failedPath+":") {
windowMentionsFailed = true
}
}
for _, line := range window {
if m := reProcessing.FindStringSubmatch(line); m != nil {
processingTU = normalizeTidyPath(m[1], failedPath)
}
m := reDiagnostic.FindStringSubmatch(line)
if m == nil {
continue
}
severity := m[4]
if severity == "note" {
continue
}
checkRaw := m[6]
if checkRaw == "" {
continue
}
check := strings.Split(checkRaw, ",")[0]
check = strings.TrimPrefix(check, "-warnings-as-errors")
rawPath := m[1]
file := normalizeTidyPath(rawPath, processingTU)
switch {
case file == failedPath, strings.HasPrefix(line, failedPath+":"):
// direct hit
case isExternalTidyPath(rawPath) && windowMentionsFailed:
file = failedPath
case processingTU == failedPath:
// e.g. missing-header errors while linting a header TU
default:
continue
}
lineNum, _ := strconv.Atoi(m[2])
colNum, _ := strconv.Atoi(m[3])
msg := m[5]
symbol := ""
complexity := 0
if cm := reComplexity.FindStringSubmatch(msg); cm != nil {
symbol = cm[1]
complexity, _ = strconv.Atoi(cm[2])
}
out = append(out, tidyFinding{
BlockFile: failedPath,
File: file,
Line: lineNum,
Column: colNum,
Severity: severity,
Check: check,
Symbol: symbol,
Message: msg,
ComplexityScore: complexity,
})
}
return dedupeFindings(out)
}
func isExternalTidyPath(raw string) bool {
return strings.HasPrefix(raw, "bazel-out/") ||
strings.HasPrefix(raw, "external/") ||
strings.Contains(raw, "/external/googlesql")
}
func dedupeFindings(in []tidyFinding) []tidyFinding {
seen := make(map[string]struct{})
var out []tidyFinding
for _, f := range in {
key := fmt.Sprintf("%s:%d:%d:%s:%s", f.File, f.Line, f.Column, f.Check, f.Message)
if _, ok := seen[key]; ok {
continue
}
seen[key] = struct{}{}
out = append(out, f)
}
return out
}
// normalizeTidyPath maps clang-tidy diagnostic paths onto first-party
// repo-relative paths. External / bazel-out paths are attributed to the
// TU block file so triage stays one row per `FAILED:` entry.
func normalizeTidyPath(raw, blockFile string) string {
path := strings.TrimSpace(raw)
path = strings.TrimPrefix(path, "./")
if strings.HasPrefix(path, "/") {
if idx := strings.Index(path, "/backend/"); idx >= 0 {
return strings.TrimPrefix(path[idx+1:], "/")
}
const marker = "bigquery-emulator/"
if _, after, ok := strings.Cut(path, marker); ok {
return after
}
return blockFile
}
if strings.HasPrefix(path, "external/") || strings.HasPrefix(path, "bazel-out/") {
return blockFile
}
return path
}
func writeTidyCSV(path string, findings []tidyFinding) error {
//nolint:gosec // Output path comes from task wrapper defaults or explicit flags.
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil && !os.IsExist(err) {
// csv at repo root has no dir
if filepath.Dir(path) != "." {
return err
}
}
//nolint:gosec // Output path comes from task wrapper defaults or explicit flags.
out, err := os.Create(path)
if err != nil {
return fmt.Errorf("create csv: %w", err)
}
defer func() { _ = out.Close() }()
w := csv.NewWriter(out)
if err := w.Write([]string{
"block_file", "file", "line", "column", "severity", "check", "symbol", "message", "complexity_score",
}); err != nil {
return err
}
for _, f := range findings {
if err := w.Write([]string{
f.BlockFile,
f.File,
strconv.Itoa(f.Line),
strconv.Itoa(f.Column),
f.Severity,
f.Check,
f.Symbol,
f.Message,
strconv.Itoa(f.ComplexityScore),
}); err != nil {
return err
}
}
w.Flush()
return w.Error()
}
package main
import (
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
)
type fileSummary struct {
File string
Findings []tidyFinding
WorstComplexity int
Checks map[string]int
}
func failedFileForFinding(f tidyFinding, failedFiles map[string]struct{}) string {
if _, ok := failedFiles[f.File]; ok {
return f.File
}
if _, ok := failedFiles[f.BlockFile]; ok {
return f.BlockFile
}
return ""
}
func summarizeByFile(findings []tidyFinding, failedFiles map[string]struct{}) []fileSummary {
byFile := make(map[string]*fileSummary)
for path := range failedFiles {
byFile[path] = &fileSummary{File: path, Checks: make(map[string]int)}
}
for _, f := range findings {
key := failedFileForFinding(f, failedFiles)
if key == "" {
continue
}
s := byFile[key]
s.Findings = append(s.Findings, f)
s.Checks[f.Check]++
if f.ComplexityScore > s.WorstComplexity {
s.WorstComplexity = f.ComplexityScore
}
}
out := make([]fileSummary, 0, len(byFile))
for _, s := range byFile {
out = append(out, *s)
}
sort.Slice(out, func(i, j int) bool { return out[i].File < out[j].File })
return out
}
func subsystemFor(path string) string {
switch {
case strings.HasPrefix(path, "backend/catalog/"):
return "catalog"
case strings.HasPrefix(path, "backend/engine/control/"):
return "control"
case strings.HasPrefix(path, "backend/engine/coordinator/"):
return "coordinator"
case strings.HasPrefix(path, "backend/engine/duckdb/"):
return "duckdb"
case strings.HasPrefix(path, "backend/engine/semantic/"):
return "semantic"
case strings.HasPrefix(path, "backend/storage/"):
return "storage"
case strings.HasPrefix(path, "backend/schema/"):
return "schema"
case strings.HasPrefix(path, "backend/sqltools/"):
return "sqltools"
case strings.HasPrefix(path, "frontend/handlers/"):
return "frontend"
case strings.HasPrefix(path, "binaries/"):
return "binaries"
case strings.HasPrefix(path, "tools/googlesql-prebuilt/smoke/"):
return "smoke"
default:
return "other"
}
}
func fileKind(path string) string {
base := filepath.Base(path)
switch {
case strings.HasSuffix(base, "_test.cc"):
return fileKindTest
case strings.HasSuffix(base, "_test_fixture.h"), strings.HasSuffix(base, "_internal.h"):
return fileKindFixture
case strings.HasSuffix(base, ".h"), strings.HasSuffix(base, ".hpp"):
return "header"
default:
return "production"
}
}
func dispositionFor(s fileSummary) (disp string, wave int) {
hasNonComplexity := false
for check := range s.Checks {
if check != "readability-function-cognitive-complexity" &&
check != "readability-function-size" {
hasNonComplexity = true
break
}
}
if hasNonComplexity {
for check := range s.Checks {
if strings.HasPrefix(check, "modernize-") {
return "autofix", 0
}
}
return dispFix, 3
}
kind := fileKind(s.File)
score := s.WorstComplexity
switch {
case score > 50:
return dispFix, 1
case score >= 35:
return dispFix, 2
case score >= 26:
if kind == fileKindTest || kind == fileKindFixture {
return "NOLINT", 4
}
return dispFix, 4
case kind == fileKindTest || kind == fileKindFixture:
return "NOLINT", 4
default:
return dispFix, 4
}
}
func writeTriageMarkdown(path string, findings []tidyFinding, failedFiles map[string]struct{}, totalBlocks int) error {
//nolint:gosec // Output path comes from task wrapper defaults or explicit flags.
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return fmt.Errorf("mkdir triage doc: %w", err)
}
content := buildTriageMarkdown(findings, failedFiles, totalBlocks)
//nolint:gosec // Generated triage doc is intentionally world-readable.
return os.WriteFile(path, []byte(content), 0o644)
}
func buildTriageMarkdown(findings []tidyFinding, failedFiles map[string]struct{}, totalBlocks int) string {
summaries := summarizeByFile(findings, failedFiles)
checkCounts := make(map[string]int)
complexityCount := 0
for _, f := range findings {
checkCounts[f.Check]++
if f.ComplexityScore > 0 {
complexityCount++
}
}
var b strings.Builder
writeTriageSummary(&b, summaries, findings, checkCounts, complexityCount, totalBlocks)
writeTriageMatrix(&b, summaries)
writeTriageWaves(&b)
return b.String()
}
func writeTriageSummary(
b *strings.Builder,
summaries []fileSummary,
findings []tidyFinding,
checkCounts map[string]int,
complexityCount, totalBlocks int,
) {
b.WriteString("# clang-tidy triage (first-party C++)\n\n")
b.WriteString("Generated by `task lint:cpp:tidy-report` from `lint-cpp-tidy.log`.\n\n")
b.WriteString("## Summary\n\n")
_, _ = fmt.Fprintf(b, "| Metric | Value |\n|---|---|\n")
_, _ = fmt.Fprintf(b, "| Files scanned | %d |\n", totalBlocks)
_, _ = fmt.Fprintf(b, "| Files failed | %d |\n", len(summaries))
_, _ = fmt.Fprintf(b, "| Total findings | %d |\n", len(findings))
_, _ = fmt.Fprintf(b, "| Cognitive-complexity findings | %d |\n\n", complexityCount)
b.WriteString("**Check mix:** Failures are **not** complexity-only. ")
b.WriteString(
"Cognitive-complexity is the largest bucket, but analyzer and cppcoreguidelines checks also fail the lane.\n\n",
)
b.WriteString("### Findings by check\n\n")
b.WriteString("| Check | Count |\n|---|---|\n")
for _, check := range sortedKeys(checkCounts) {
_, _ = fmt.Fprintf(b, "| `%s` | %d |\n", check, checkCounts[check])
}
b.WriteString("\n### Failed files by subsystem\n\n")
b.WriteString("| Subsystem | Files |\n|---|---|\n")
subFailed := make(map[string]int)
for _, s := range summaries {
subFailed[subsystemFor(s.File)]++
}
for _, sub := range sortedKeys(subFailed) {
_, _ = fmt.Fprintf(b, "| %s | %d |\n", sub, subFailed[sub])
}
}
func writeTriageMatrix(b *strings.Builder, summaries []fileSummary) {
b.WriteString("\n## Disposition matrix\n\n")
b.WriteString("| File | Findings | Checks | Worst complexity | Subsystem | Kind | Disposition | Wave | Notes |\n")
b.WriteString("|---|---:|---|---:|---|---|---|---:|---|\n")
for _, s := range summaries {
checks := sortedCheckList(s.Checks)
symbols := uniqueSymbols(s.Findings)
disp, wave := dispositionFor(s)
notes := strings.Join(symbols, ", ")
if len(notes) > 80 {
notes = notes[:77] + "..."
}
_, _ = fmt.Fprintf(b, "| `%s` | %d | %s | %d | %s | %s | %s | %d | %s |\n",
s.File, len(s.Findings), checks, s.WorstComplexity,
subsystemFor(s.File), fileKind(s.File), disp, wave, notes)
}
}
func writeTriageWaves(b *strings.Builder) {
b.WriteString("\n## Remediation waves\n\n")
b.WriteString("1. **Wave 0** — Tooling + `clang-tidy --fix` on rows dispositioned `autofix`.\n")
b.WriteString("2. **Wave 1** — P0 complexity (>50): dedicated PRs with tests.\n")
b.WriteString("3. **Wave 2** — P1 complexity (35–50): one PR per subsystem.\n")
b.WriteString("4. **Wave 3** — Non-complexity correctness (`bugprone-*`, `clang-analyzer-*`).\n")
b.WriteString("5. **Wave 4** — P2 complexity (26–34), tests/fixtures, scoped NOLINT.\n")
}
func sortedKeys(m map[string]int) []string {
keys := make([]string, 0, len(m))
for k := range m {
keys = append(keys, k)
}
sort.Slice(keys, func(i, j int) bool {
if m[keys[i]] != m[keys[j]] {
return m[keys[i]] > m[keys[j]]
}
return keys[i] < keys[j]
})
return keys
}
func sortedCheckList(m map[string]int) string {
keys := sortedKeys(m)
parts := make([]string, len(keys))
for i, k := range keys {
parts[i] = fmt.Sprintf("%s (%d)", k, m[k])
}
return strings.Join(parts, "; ")
}
func uniqueSymbols(findings []tidyFinding) []string {
seen := make(map[string]struct{})
var out []string
for _, f := range findings {
name := f.Symbol
if name == "" {
name = truncateMsg(f.Message, 40)
}
if _, ok := seen[name]; ok {
continue
}
seen[name] = struct{}{}
out = append(out, name)
}
return out
}
func truncateMsg(s string, n int) string {
if len(s) <= n {
return s
}
return s[:n-3] + "..."
}
func printTidySummary(
w io.Writer,
findings []tidyFinding,
failedFiles map[string]struct{},
totalBlocks int,
csvPath, mdPath string,
) {
checkCounts := make(map[string]int)
complexityCount := 0
for _, f := range findings {
checkCounts[f.Check]++
if f.ComplexityScore > 0 {
complexityCount++
}
}
_, _ = fmt.Fprintf(w, "lint:cpp:tidy-report: scanned %d file blocks, %d failed files, %d findings\n",
totalBlocks, len(failedFiles), len(findings))
_, _ = fmt.Fprintf(w, " cognitive-complexity findings: %d\n", complexityCount)
_, _ = fmt.Fprintf(w, " checks:\n")
for _, check := range sortedKeys(checkCounts) {
_, _ = fmt.Fprintf(w, " %s: %d\n", check, checkCounts[check])
}
_, _ = fmt.Fprintf(w, " csv: %s\n", csvPath)
_, _ = fmt.Fprintf(w, " triage: %s\n", mdPath)
}
package main
import (
"encoding/json"
"fmt"
"io"
"os"
"strings"
)
const (
missingMessage = "n/a"
colorBlue = "blue"
colorOrange = "orange"
colorLightgrey = "lightgrey"
)
type badgeJSON struct {
SchemaVersion int `json:"schemaVersion"`
Label string `json:"label"`
Message string `json:"message"`
Color string `json:"color"`
}
func badgeMessage(version string) string {
version = strings.TrimSpace(version)
if version == "" {
return missingMessage
}
return version
}
func badgeColor(version string) string {
version = strings.TrimSpace(version)
if version == "" {
return colorLightgrey
}
if isPreRelease(version) {
return colorOrange
}
return colorBlue
}
func isPreRelease(tag string) bool {
tag = strings.TrimPrefix(tag, "v")
return strings.Contains(tag, "-")
}
func runBadge(args []string, stdout, stderr io.Writer) error {
fs := flagSet("badge", stderr)
version := fs.String("version", "", "release tag to render (empty renders n/a)")
out := fs.String("out", "", "output badge JSON path (default: stdout)")
label := fs.String("label", "release", "badge label")
if err := fs.Parse(args); err != nil {
return err
}
payload := badgeJSON{
SchemaVersion: 1,
Label: *label,
Message: badgeMessage(*version),
Color: badgeColor(*version),
}
return emitBadge(&payload, *out, stdout)
}
func emitBadge(b *badgeJSON, outPath string, stdout io.Writer) error {
buf, err := json.MarshalIndent(b, "", " ")
if err != nil {
return fmt.Errorf("marshal badge: %w", err)
}
buf = append(buf, '\n')
if outPath == "" {
_, err = stdout.Write(buf)
return err
}
//nolint:gosec // 0o644 is the right mode for a CI-published JSON artifact.
if err := os.WriteFile(outPath, buf, 0o644); err != nil {
return fmt.Errorf("write %q: %w", outPath, err)
}
return nil
}
// Command release emits shields.io endpoint JSON for the README
// release badge. The release workflow publishes badge-release.json
// so the badge does not depend on shields.io's shared GitHub API
// token pool.
package main
import (
"errors"
"flag"
"fmt"
"io"
"os"
)
const (
cmdBadge = "badge"
cmdReadmeBadge = "readme-badge"
)
var errUsage = errors.New("usage error")
func run(args []string, stdout, stderr io.Writer) error {
if len(args) < 1 {
usage(stderr)
return errUsage
}
switch args[0] {
case cmdBadge:
return runBadge(args[1:], stdout, stderr)
case cmdReadmeBadge:
return runReadmeBadge(args[1:], stdout, stderr)
case "-h", "--help", "help":
usage(stdout)
return nil
default:
_, _ = fmt.Fprintf(stderr, "release: unknown subcommand %q\n\n", args[0])
usage(stderr)
return errUsage
}
}
func usage(w io.Writer) {
_, _ = fmt.Fprint(w, `release - gh-pages badge JSON for the README release shield.
Subcommands:
badge Emit shields.io endpoint JSON for the latest semver release tag.
readme-badge Patch README.md shields.io cache buster (&v=...) for the release badge.
Run "release badge -h" or "release readme-badge -h" for flags.
`)
}
func flagSet(name string, stderr io.Writer) *flag.FlagSet {
fs := flag.NewFlagSet(name, flag.ContinueOnError)
fs.SetOutput(stderr)
return fs
}
func main() {
if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
if errors.Is(err, errUsage) {
os.Exit(2)
}
_, _ = fmt.Fprintf(os.Stderr, "release: %v\n", err)
os.Exit(1)
}
}
package main
import (
"errors"
"fmt"
"io"
"os"
"regexp"
"strings"
)
const defaultReadmePath = "README.md"
// badgeReleaseCacheBusterRe matches the shields.io endpoint URL cache-buster
// query param on the README release badge line.
var badgeReleaseCacheBusterRe = regexp.MustCompile(
`(badge-release\.json&v=)[^)]+`,
)
func readmeBadgeCacheVersion(version string) (string, error) {
version = strings.TrimSpace(version)
if version == "" {
return "", errors.New("version is required")
}
return strings.TrimPrefix(version, "v"), nil
}
func patchReadmeBadgeCacheBuster(readme []byte, cacheVersion string) ([]byte, bool, error) {
if !badgeReleaseCacheBusterRe.Match(readme) {
return nil, false, errors.New("README release badge cache buster not found")
}
replacement := []byte("${1}" + cacheVersion)
out := badgeReleaseCacheBusterRe.ReplaceAll(readme, replacement)
return out, !bytesEqual(readme, out), nil
}
func bytesEqual(a, b []byte) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}
func runReadmeBadge(args []string, stdout, stderr io.Writer) error {
fs := flagSet("readme-badge", stderr)
version := fs.String("version", "", "release tag to render (e.g. v0.3.0)")
readmePath := fs.String("readme", defaultReadmePath, "README path to patch")
if err := fs.Parse(args); err != nil {
return err
}
cacheVersion, err := readmeBadgeCacheVersion(*version)
if err != nil {
return err
}
raw, err := os.ReadFile(*readmePath)
if err != nil {
return fmt.Errorf("read %q: %w", *readmePath, err)
}
patched, changed, err := patchReadmeBadgeCacheBuster(raw, cacheVersion)
if err != nil {
return err
}
if !changed {
_, _ = fmt.Fprintf(stdout, "readme-badge: %s already at &v=%s\n", *readmePath, cacheVersion)
return nil
}
//nolint:gosec // 0o644 is the right mode for a tracked README.
if err := os.WriteFile(*readmePath, patched, 0o644); err != nil {
return fmt.Errorf("write %q: %w", *readmePath, err)
}
_, _ = fmt.Fprintf(stdout, "readme-badge: patched %s to &v=%s\n", *readmePath, cacheVersion)
return nil
}