bench: Go Coverage Report

package main

import (
        "context"
        "errors"
        "flag"
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/bench/runner"
)

func main() {
        var (
                targetFlag   = flag.String("target", "emulator", "emulator, goccy, bigquery, or all")
                casesDir     = flag.String("cases", defaultCasesDir(), "directory of benchmark YAML cases")
                baselinePath = flag.String("baseline", defaultBaselinePath(), "path to bigquery.json baseline")
                capture      = flag.Bool("capture", false, "capture baseline (requires --target=bigquery)")
                compare      = flag.Bool("compare", false, "compare emulator results to committed baseline")
                jsonOut      = flag.String("json-out", "", "write machine-readable results JSON")
                project      = flag.String("project", os.Getenv("BENCH_BQ_PROJECT"), "BigQuery project for capture")
                goccyImage   = flag.String("goccy-image", "", "docker image for goccy emulator")
                caseFilter   = flag.String("case", "", "run a single case by name")
                engineBin    = flag.String("engine-binary", "", "path to emulator_main")
                skipGoccy    = flag.Bool(
                        "skip-goccy",
                        os.Getenv("BENCH_SKIP_GOCCY") == "1",
                        "skip goccy target when --target=all",
                )
                quiet = flag.Bool("quiet", false, "suppress per-case progress logging on stderr")
        )
        flag.Parse()
        if err := run(context.Background(), config{
                target:       *targetFlag,
                casesDir:     *casesDir,
                baselinePath: *baselinePath,
                capture:      *capture,
                compare:      *compare,
                jsonOut:      *jsonOut,
                project:      *project,
                goccyImage:   *goccyImage,
                caseFilter:   *caseFilter,
                engineBin:    *engineBin,
                skipGoccy:    *skipGoccy,
                quiet:        *quiet,
        }); err != nil {
                fmt.Fprintf(os.Stderr, "bench: %v\n", err)
                os.Exit(1)
        }
}

type config struct {
        target       string
        casesDir     string
        baselinePath string
        capture      bool
        compare      bool
        jsonOut      string
        project      string
        goccyImage   string
        caseFilter   string
        engineBin    string
        skipGoccy    bool
        quiet        bool
}

func run(ctx context.Context, cfg config) error {
        opts := runner.TargetOptions{
                EngineBinary: cfg.engineBin,
                GoccyImage:   cfg.goccyImage,
                BQProject:    cfg.project,
        }
        targets, err := resolveTargets(cfg.target, opts, cfg.skipGoccy)
        if err != nil {
                return err
        }
        baseline := loadBaseline(cfg)
        progress := func(format string, args ...any) {
                _, _ = fmt.Fprintf(os.Stderr, "%s bench: %s\n",
                        time.Now().Format("15:04:05"), fmt.Sprintf(format, args...))
        }
        if cfg.quiet {
                progress = nil
        }
        report, err := runner.Run(ctx, runner.RunOptions{
                CasesDir:   cfg.casesDir,
                CaseFilter: cfg.caseFilter,
                Targets:    targets,
                Baseline:   baseline,
                Compare:    cfg.compare,
                Progress:   progress,
        })
        if err != nil {
                return err
        }
        if err := writeRunOutputs(cfg, targets, report, baseline); err != nil {
                return err
        }
        return enforceCompareGate(cfg, report)
}

func loadBaseline(cfg config) *runner.BaselineFile {
        if cfg.capture {
                return nil
        }
        if !cfg.compare {
                return nil
        }
        loaded, loadErr := runner.LoadBaseline(cfg.baselinePath)
        if loadErr != nil && cfg.compare {
                return &runner.BaselineFile{Cases: map[string]runner.BaselineCase{}}
        }
        if loadErr == nil {
                return &loaded
        }
        return nil
}

func writeRunOutputs(
        cfg config,
        targets []runner.Target,
        report runner.RunReport,
        baseline *runner.BaselineFile,
) error {
        if cfg.goccyImage != "" {
                report.GoccyImage = cfg.goccyImage
        } else if !cfg.skipGoccy && containsTarget(targets, runner.TargetGoccy) {
                report.GoccyImage = runner.ImageTag(runner.DefaultGoccyImage())
        }
        runner.PrintTextReport(os.Stdout, report, baseline)
        if err := saveJSONReport(cfg, report); err != nil {
                return err
        }
        if !cfg.capture {
                return nil
        }
        if cfg.project == "" {
                return errors.New("--project or BENCH_BQ_PROJECT required for capture")
        }
        b := runner.BuildBaselineFromResults(cfg.project, report.Results)
        // Merge into any existing baseline so a partial capture (e.g.
        // --case create_view_100k) updates only the cases that ran instead of
        // discarding the rest of the file.
        if existing, loadErr := runner.LoadBaseline(cfg.baselinePath); loadErr == nil {
                b = runner.MergeBaseline(existing, b)
        }
        if err := runner.SaveBaseline(cfg.baselinePath, b); err != nil {
                return err
        }
        _, _ = fmt.Fprintf(os.Stdout, "wrote baseline %s (%d cases)\n", cfg.baselinePath, len(b.Cases))
        return nil
}

func saveJSONReport(cfg config, report runner.RunReport) error {
        if cfg.jsonOut == "" {
                return nil
        }
        toSave := report
        // Partial rerun: merge into existing results so other cases stay intact.
        if cfg.caseFilter != "" {
                if existing, loadErr := runner.LoadReport(cfg.jsonOut); loadErr == nil {
                        toSave = runner.MergeReport(existing, report)
                }
        }
        if err := runner.SaveReport(cfg.jsonOut, toSave); err != nil {
                return err
        }
        if cfg.caseFilter != "" {
                _, _ = fmt.Fprintf(os.Stdout, "merged results into %s (%d rows)\n",
                        cfg.jsonOut, len(toSave.Results))
        }
        return nil
}

func enforceCompareGate(cfg config, report runner.RunReport) error {
        if !cfg.compare {
                return nil
        }
        fail := 0
        for _, r := range report.Results {
                if r.Target == runner.TargetEmulator && r.Pass != nil && !*r.Pass {
                        fail++
                }
        }
        if fail > 0 {
                return fmt.Errorf("%d emulator case(s) failed compare gate", fail)
        }
        return nil
}

func resolveTargets(name string, opts runner.TargetOptions, skipGoccy bool) ([]runner.Target, error) {
        name = strings.ToLower(strings.TrimSpace(name))
        switch name {
        case "emulator":
                return []runner.Target{runner.NewEmulatorTarget(opts)}, nil
        case "goccy":
                return []runner.Target{runner.NewGoccyTarget(opts)}, nil
        case "bigquery", "bq":
                return []runner.Target{runner.NewBigQueryTarget(opts)}, nil
        case "all":
                var out []runner.Target
                out = append(out, runner.NewEmulatorTarget(opts))
                if !skipGoccy {
                        out = append(out, runner.NewGoccyTarget(opts))
                }
                return out, nil
        case "compare":
                return []runner.Target{runner.NewEmulatorTarget(opts)}, nil
        default:
                return nil, fmt.Errorf("unknown target %q", name)
        }
}

func containsTarget(targets []runner.Target, name runner.TargetName) bool {
        for _, t := range targets {
                if t.Name() == name {
                        return true
                }
        }
        return false
}

func defaultCasesDir() string {
        root, err := repoRoot()
        if err != nil {
                return "bench/cases"
        }
        return filepath.Join(root, "bench", "cases")
}

func defaultBaselinePath() string {
        root, err := repoRoot()
        if err != nil {
                return "bench/baselines/bigquery.json"
        }
        return filepath.Join(root, "bench", "baselines", "bigquery.json")
}

func repoRoot() (string, error) {
        wd, err := os.Getwd()
        if err != nil {
                return "", err
        }
        dir := wd
        for {
                if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
                        return dir, nil
                }
                parent := filepath.Dir(dir)
                if parent == dir {
                        return "", fmt.Errorf("go.mod not found from %s", wd)
                }
                dir = parent
        }
}

package runner

import (
        "encoding/json"
        "fmt"
        "maps"
        "os"
        "time"
)

// BaselineFile is the committed golden BigQuery capture.
type BaselineFile struct {
        CapturedAt  time.Time               `json:"captured_at"`
        Project     string                  `json:"project"`
        ProjectHash string                  `json:"project_hash,omitempty"`
        Cases       map[string]BaselineCase `json:"cases"`
}

// BaselineCase holds golden latency and correctness for one case.
type BaselineCase struct {
        ContentHash    string `json:"content_hash"`
        TotalP50MS     int64  `json:"total_p50_ms"`
        ExecutionP50MS int64  `json:"execution_p50_ms"`
        QueueP50MS     int64  `json:"queue_p50_ms,omitempty"`
        TotalSlotMsP50 int64  `json:"total_slot_ms_p50,omitempty"`
        BytesProcessed int64  `json:"bytes_processed,omitempty"`
        ResultHash     string `json:"result_hash"`
        RowCount       int    `json:"row_count"`
}

// LoadBaseline reads bench/baselines/bigquery.json.
func LoadBaseline(path string) (BaselineFile, error) {
        raw, err := os.ReadFile(path) //nolint:gosec // baseline path is CLI-controlled
        if err != nil {
                return BaselineFile{}, err
        }
        var b BaselineFile
        if err := json.Unmarshal(raw, &b); err != nil {
                return BaselineFile{}, err
        }
        if b.Cases == nil {
                b.Cases = map[string]BaselineCase{}
        }
        return b, nil
}

// SaveBaseline writes a baseline file.
//
//nolint:gosec // baseline path and 0o644 mode are CLI-controlled benchmark artifacts.
func SaveBaseline(path string, b BaselineFile) error {
        b.CapturedAt = b.CapturedAt.UTC()
        raw, err := json.MarshalIndent(b, "", "  ")
        if err != nil {
                return err
        }
        return os.WriteFile(
                path,
                append(raw, '\n'),
                0o644,
        )
}

// BuildBaselineFromResults constructs a baseline from a BQ benchmark run.
func BuildBaselineFromResults(project string, results []CaseResult) BaselineFile {
        b := BaselineFile{
                CapturedAt: time.Now().UTC(),
                Project:    project,
                Cases:      map[string]BaselineCase{},
        }
        for _, r := range results {
                if r.Target != TargetBigQuery || r.Outcome != OutcomeOK {
                        continue
                }
                b.Cases[r.CaseName] = BaselineCase{
                        ContentHash:    r.ContentHash,
                        TotalP50MS:     r.Latency.P50.Milliseconds(),
                        ExecutionP50MS: r.ExecutionP50.Milliseconds(),
                        QueueP50MS:     r.QueueP50.Milliseconds(),
                        TotalSlotMsP50: r.TotalSlotMsP50,
                        BytesProcessed: r.BytesProcessed,
                        ResultHash:     r.ResultHash,
                        RowCount:       r.RowCount,
                }
        }
        return b
}

// MergeBaseline overlays fresh capture results onto an existing baseline,
// preserving cases that were not part of this run. This makes partial
// captures (e.g. --case create_view_100k) update or add only the cases that
// ran, instead of discarding every other case in the file. The captured-at
// timestamp and project are taken from the fresh capture.
func MergeBaseline(existing, fresh BaselineFile) BaselineFile {
        out := existing
        if out.Cases == nil {
                out.Cases = map[string]BaselineCase{}
        }
        out.CapturedAt = fresh.CapturedAt
        if fresh.Project != "" {
                out.Project = fresh.Project
        }
        if fresh.ProjectHash != "" {
                out.ProjectHash = fresh.ProjectHash
        }
        maps.Copy(out.Cases, fresh.Cases)
        return out
}

// CompareToBaseline checks emulator result against golden baseline.
func CompareToBaseline(c Case, base BaselineCase, r CaseResult) (pass bool, reason string) {
        if base.ContentHash != "" && base.ContentHash != c.ContentHash {
                return false, fmt.Sprintf("stale baseline (case changed): want hash %s got %s", base.ContentHash, c.ContentHash)
        }
        if r.Outcome == OutcomeWrongResult {
                return false, "wrong result vs baseline hash"
        }
        if r.Outcome != OutcomeOK {
                return false, string(r.Outcome) + ": " + r.Error
        }
        emuLatency := r.CompareLatencyP50()
        threshold := time.Duration(c.MaxMS) * time.Millisecond
        bqMS := base.LatencyP50ForRatio()
        if bqMS > 0 {
                bq := time.Duration(bqMS) * time.Millisecond
                ratioThreshold := time.Duration(float64(bq) * c.MaxRatio)
                if ratioThreshold > threshold {
                        threshold = ratioThreshold
                }
        }
        if emuLatency > threshold {
                return false, fmt.Sprintf("p50 %s > threshold %s (bq execution p50 %dms, ratio %.2f)",
                        emuLatency, threshold, base.LatencyP50MS(), c.MaxRatio)
        }
        return true, ""
}

package runner

import (
        "context"
        "errors"
        "fmt"
        "math/rand"
        "net/http"
        "strconv"
        "strings"
        "time"

        "cloud.google.com/go/bigquery"
        "google.golang.org/api/googleapi"
        "google.golang.org/api/iterator"
)

// Exponential-backoff knobs for BigQuery rate-limit/quota retries. These
// are vars (not consts) so tests can shrink the waits. DDL-heavy cases
// (CREATE OR REPLACE TABLE/VIEW on the same object) trip BigQuery's
// per-table metadata-update quota, which only clears over several seconds;
// backoff spreads retries until the window reopens.
//
// bqAttemptTimeout caps a single submission attempt. The BigQuery client
// retries jobRateLimitExceeded *internally* on the call's context until that
// context is done (see runWithRetryExplicit in the client), so without a
// per-attempt cap the client's own retry consumes the entire per-query
// deadline and our backoff loop never runs (the "0 retries" symptom). Capping
// each attempt hands control back to this loop while still leaving the client's
// short internal backoff intact within the slice. It must exceed the slowest
// legitimate query/setup in the suite (a few seconds) by a wide margin.
var (
        bqBaseBackoff    = 1 * time.Second
        bqMaxBackoff     = 32 * time.Second
        bqMaxRetries     = 8
        bqAttemptTimeout = 30 * time.Second
)

// isNotFound reports whether err is a BigQuery 404 (dataset absent).
func isNotFound(err error) bool {
        var apiErr *googleapi.Error
        return errors.As(err, &apiErr) && apiErr.Code == http.StatusNotFound
}

// BigQueryTarget runs cases against real BigQuery via ADC.
type BigQueryTarget struct {
        opts     TargetOptions
        client   *bigquery.Client
        project  string
        datasets []string
        location string
}

func NewBigQueryTarget(opts TargetOptions) *BigQueryTarget {
        return &BigQueryTarget{opts: opts}
}

func (t *BigQueryTarget) Name() TargetName { return TargetBigQuery }

func (t *BigQueryTarget) Start(ctx context.Context) error {
        if t.opts.BQProject == "" {
                return errors.New("BENCH_BQ_PROJECT or --project is required for bigquery target")
        }
        t.project = t.opts.BQProject
        t.location = t.opts.BQLocation
        if t.location == "" {
                t.location = "US"
        }
        client, err := bigquery.NewClient(ctx, t.project)
        if err != nil {
                return fmt.Errorf("bigquery.NewClient: %w", err)
        }
        t.client = client
        return nil
}

// ProjectID returns the billing project for BigQuery runs.
func (t *BigQueryTarget) ProjectID() string { return t.project }

func (t *BigQueryTarget) SetupCase(ctx context.Context, c Case, dataset string) error {
        dsID := strings.TrimPrefix(dataset, t.project+".")
        // Drop any leftover dataset from a previous (interrupted) run so
        // setup always starts from a clean slate. NotFound is the normal
        // case and is ignored.
        if err := t.client.Dataset(dsID).DeleteWithContents(ctx); err != nil && !isNotFound(err) {
                return fmt.Errorf("delete stale dataset %s: %w", dsID, err)
        }
        meta := &bigquery.DatasetMetadata{
                Location:               t.location,
                DefaultTableExpiration: 24 * time.Hour,
        }
        if err := t.client.Dataset(dsID).Create(ctx, meta); err != nil {
                return fmt.Errorf("create dataset %s: %w", dsID, err)
        }
        t.datasets = append(t.datasets, dsID)
        setup, _ := c.Substitute(dataset, t.project)
        for _, sql := range setup {
                if err := t.runSQL(ctx, sql); err != nil {
                        return err
                }
        }
        return nil
}

func (t *BigQueryTarget) RunQuery(ctx context.Context, c Case, sql string, timeout time.Duration) (QueryResult, error) {
        if timeout <= 0 {
                timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
        }
        return timedQuery(ctx, func(ctx context.Context) (QueryResult, error) {
                job, err := t.runJob(ctx, sql)
                if err != nil {
                        return QueryResult{Error: err.Error()}, err
                }
                status := job.LastStatus()
                metrics, err := extractBQJobMetrics(status)
                if err != nil {
                        return QueryResult{Error: err.Error()}, err
                }
                if metrics.cacheHit {
                        cacheErr := errors.New("bigquery query cache hit (DisableQueryCache ineffective)")
                        return QueryResult{Error: cacheErr.Error()}, cacheErr
                }
                it, err := job.Read(ctx)
                if err != nil {
                        return QueryResult{
                                Error:          err.Error(),
                                ExecutionOnly:  metrics.execution,
                                ExecutionValid: true,
                                QueueOnly:      metrics.queue,
                                SlotMs:         metrics.slotMs,
                        }, err
                }
                rows, err := readAllRows(it)
                if err != nil {
                        return QueryResult{
                                Error:          err.Error(),
                                ExecutionOnly:  metrics.execution,
                                ExecutionValid: true,
                                QueueOnly:      metrics.queue,
                                SlotMs:         metrics.slotMs,
                        }, err
                }
                maps := bqRowsToMaps(rows)
                hash, _ := HashRows(maps)
                return QueryResult{
                        ExecutionOnly:  metrics.execution,
                        ExecutionValid: true,
                        QueueOnly:      metrics.queue,
                        SlotMs:         metrics.slotMs,
                        BytesProcessed: metrics.bytesProcessed,
                        Rows:           maps,
                        RowCount:       len(maps),
                        ResultHash:     hash,
                }, nil
        }, timeout)
}

func (t *BigQueryTarget) Cleanup(ctx context.Context) error {
        if t.client == nil {
                return nil
        }
        for _, ds := range t.datasets {
                if err := t.client.Dataset(ds).DeleteWithContents(ctx); err != nil {
                        _ = t.client.Close()
                        return err
                }
        }
        return t.client.Close()
}

func (t *BigQueryTarget) runSQL(ctx context.Context, sql string) error {
        job, err := t.runJob(ctx, sql)
        if err != nil {
                return err
        }
        status, err := job.Wait(ctx)
        if err != nil {
                return err
        }
        if err := status.Err(); err != nil {
                return err
        }
        return nil
}

func (t *BigQueryTarget) runJob(ctx context.Context, sql string) (*bigquery.Job, error) {
        return retryOnRateLimit(ctx, func(ctx context.Context) (*bigquery.Job, error) {
                return t.runJobOnce(ctx, sql)
        })
}

func (t *BigQueryTarget) runJobOnce(ctx context.Context, sql string) (*bigquery.Job, error) {
        q := t.client.Query(sql)
        // Benchmarks must never read cached results; cache hits yield ~0ms execution.
        q.DisableQueryCache = true
        q.Location = t.location
        job, err := q.Run(ctx)
        if err != nil {
                return nil, err
        }
        status, err := job.Wait(ctx)
        if err != nil {
                return nil, err
        }
        if err := status.Err(); err != nil {
                return nil, err
        }
        return job, nil
}

// isRateLimitErr reports whether err is a BigQuery throttling/quota or
// transient backend error worth retrying with backoff. Rate-limit errors
// arrive in two shapes: a structured googleapi reason, or an HTTP 400 whose
// only signal is the reason text in the message (e.g. the
// "Job exceeded rate limits: ... jobRateLimitExceeded" we see on repeated
// CREATE OR REPLACE statements).
func isRateLimitErr(err error) bool {
        if err == nil {
                return false
        }
        var apiErr *googleapi.Error
        if errors.As(err, &apiErr) {
                switch apiErr.Code {
                case http.StatusTooManyRequests, // 429
                        http.StatusInternalServerError, // 500
                        http.StatusBadGateway,          // 502
                        http.StatusServiceUnavailable:  // 503
                        return true
                }
                for _, e := range apiErr.Errors {
                        switch e.Reason {
                        case "rateLimitExceeded", "jobRateLimitExceeded",
                                "quotaExceeded", "backendError", "internalError":
                                return true
                        }
                }
        }
        msg := strings.ToLower(err.Error())
        for _, frag := range []string{
                "ratelimitexceeded",
                "jobratelimitexceeded",
                "exceeded rate limits",
                "exceeded quota",
                "quotaexceeded",
                "backenderror",
        } {
                if strings.Contains(msg, frag) {
                        return true
                }
        }
        return false
}

// retryOnRateLimit runs fn, retrying rate-limit/quota/backend errors with
// exponential backoff and full jitter until success, a non-retryable error,
// the retry budget is exhausted, or ctx expires.
//
// Each attempt runs against a sub-context capped at bqAttemptTimeout so the
// BigQuery client's internal retryer cannot consume the whole parent deadline
// before this loop gets to back off (context.WithTimeout also caps at the
// parent's own deadline, so we never exceed the per-query budget).
func retryOnRateLimit(
        ctx context.Context,
        fn func(context.Context) (*bigquery.Job, error),
) (*bigquery.Job, error) {
        backoff := bqBaseBackoff
        for attempt := 0; ; attempt++ {
                attemptCtx, cancel := context.WithTimeout(ctx, bqAttemptTimeout)
                job, err := fn(attemptCtx)
                cancel()
                if err == nil {
                        return job, nil
                }
                // A capped attempt that timed out on the rate-limit reason is still a
                // rate-limit error worth backing off on; isRateLimitErr matches the
                // reason text the client leaves in the wrapped deadline error.
                if !isRateLimitErr(err) {
                        return nil, err
                }
                if attempt >= bqMaxRetries {
                        return nil, fmt.Errorf("rate limit: exhausted %d attempts: %w", attempt+1, err)
                }
                // Parent deadline/cancellation reached: no budget left to back off.
                if cerr := ctx.Err(); cerr != nil {
                        return nil, fmt.Errorf("rate limit: parent context done after %d attempts: %w (last error: %w)",
                                attempt+1, cerr, err)
                }
                // Full jitter: wait in [0, backoff] to avoid synchronized retries.
                wait := time.Duration(rand.Int63n(int64(backoff) + 1)) //nolint:gosec // jitter, not crypto
                timer := time.NewTimer(wait)
                select {
                case <-ctx.Done():
                        timer.Stop()
                        return nil, fmt.Errorf("rate limit: backoff aborted after %d attempts: %w (last error: %w)",
                                attempt+1, ctx.Err(), err)
                case <-timer.C:
                }
                if backoff < bqMaxBackoff {
                        backoff *= 2
                        if backoff > bqMaxBackoff {
                                backoff = bqMaxBackoff
                        }
                }
        }
}

type bqJobMetrics struct {
        execution      time.Duration
        queue          time.Duration
        slotMs         int64
        cacheHit       bool
        bytesProcessed int64
}

func extractBQJobMetrics(status *bigquery.JobStatus) (bqJobMetrics, error) {
        if status == nil || status.Statistics == nil {
                return bqJobMetrics{}, errors.New("missing BigQuery job statistics")
        }
        st := status.Statistics
        if st.StartTime.IsZero() || st.EndTime.IsZero() {
                return bqJobMetrics{}, errors.New("missing BigQuery startTime or endTime")
        }
        m := bqJobMetrics{
                execution:      st.EndTime.Sub(st.StartTime),
                bytesProcessed: st.TotalBytesProcessed,
        }
        if !st.CreationTime.IsZero() && st.StartTime.After(st.CreationTime) {
                m.queue = st.StartTime.Sub(st.CreationTime)
        }
        if qs, ok := st.Details.(*bigquery.QueryStatistics); ok {
                m.cacheHit = qs.CacheHit
                m.slotMs = qs.SlotMillis
        }
        if m.slotMs == 0 && st.TotalSlotDuration > 0 {
                m.slotMs = st.TotalSlotDuration.Milliseconds()
        }
        return m, nil
}

func readAllRows(it *bigquery.RowIterator) ([]map[string]bigquery.Value, error) {
        var out []map[string]bigquery.Value
        for {
                var row map[string]bigquery.Value
                err := it.Next(&row)
                if errors.Is(err, iterator.Done) {
                        break
                }
                if err != nil {
                        return nil, err
                }
                out = append(out, row)
        }
        return out, nil
}

func bqRowsToMaps(rows []map[string]bigquery.Value) []map[string]string {
        out := make([]map[string]string, 0, len(rows))
        for _, row := range rows {
                m := make(map[string]string, len(row))
                for k, v := range row {
                        m[k] = bqValueToString(v)
                }
                out = append(out, m)
        }
        return out
}

func bqValueToString(v bigquery.Value) string {
        switch t := v.(type) {
        case nil:
                return ""
        case string:
                return t
        case int64:
                return strconv.FormatInt(t, 10)
        case float64:
                return strconv.FormatFloat(t, 'f', -1, 64)
        case bool:
                if t {
                        return "true"
                }
                return "false"
        default:
                return fmt.Sprint(t)
        }
}

var _ Target = (*BigQueryTarget)(nil)

package runner

import (
        "crypto/sha256"
        "encoding/hex"
        "fmt"
        "os"
        "path/filepath"
        "slices"
        "sort"
        "strings"
        "time"

        "gopkg.in/yaml.v3"
)

// Case is one YAML benchmark definition under bench/cases/.
type Case struct {
        Name        string       `yaml:"name"`
        Tags        []string     `yaml:"tags,omitempty"`
        SetupSQL    []string     `yaml:"-"`
        Query       string       `yaml:"query"`
        Iterations  int          `yaml:"iterations,omitempty"`
        Warmup      int          `yaml:"warmup,omitempty"`
        MaxRatio    float64      `yaml:"max_ratio,omitempty"`
        MaxMS       int64        `yaml:"max_ms,omitempty"`
        ProjectID   string       `yaml:"project_id,omitempty"`
        SkipTargets []TargetName `yaml:"skip_targets,omitempty"`
        SkipReason  string       `yaml:"skip_reason,omitempty"`
        Path        string       `yaml:"-"`
        ContentHash string       `yaml:"-"`
}

const (
        defaultIterations = 10
        defaultWarmup     = 2
        defaultMaxRatio   = 1.5
        defaultMaxMS      = 30_000
        defaultTimeoutMS  = 60_000
)

// LoadCases reads every *.yaml file in dir, sorted by name.
func LoadCases(dir string) ([]Case, error) {
        entries, err := os.ReadDir(dir)
        if err != nil {
                return nil, fmt.Errorf("read cases dir %s: %w", dir, err)
        }
        var paths []string
        for _, e := range entries {
                if e.IsDir() || !strings.HasSuffix(e.Name(), ".yaml") {
                        continue
                }
                paths = append(paths, filepath.Join(dir, e.Name()))
        }
        sort.Strings(paths)
        out := make([]Case, 0, len(paths))
        for _, p := range paths {
                c, err := LoadCase(p)
                if err != nil {
                        return nil, err
                }
                out = append(out, c)
        }
        return out, nil
}

// LoadCase parses a single benchmark case file.
func LoadCase(path string) (Case, error) {
        raw, err := os.ReadFile(path) //nolint:gosec // case path comes from bench/cases discovery
        if err != nil {
                return Case{}, fmt.Errorf("read %s: %w", path, err)
        }
        var c Case
        if err := yaml.Unmarshal(raw, &c); err != nil {
                return Case{}, fmt.Errorf("parse %s: %w", path, err)
        }
        if c.Name == "" {
                c.Name = strings.TrimSuffix(filepath.Base(path), filepath.Ext(path))
        }
        if c.Query == "" {
                return Case{}, fmt.Errorf("%s: query is required", path)
        }
        if c.Iterations <= 0 {
                c.Iterations = defaultIterations
        }
        if c.Warmup < 0 {
                c.Warmup = 0
        }
        if c.Warmup >= c.Iterations {
                c.Warmup = max(c.Iterations-1, 0)
        }
        if c.MaxRatio <= 0 {
                c.MaxRatio = defaultMaxRatio
        }
        if c.MaxMS <= 0 {
                c.MaxMS = defaultMaxMS
        }
        if c.ProjectID == "" {
                c.ProjectID = "bench-" + c.Name
        }
        c.Path = path
        c.ContentHash = hashContent(string(raw))
        return c, nil
}

// SkippedFor reports whether a target should not run this case.
func (c Case) SkippedFor(target TargetName) (bool, string) {
        if slices.Contains(c.SkipTargets, target) {
                reason := c.SkipReason
                if reason == "" {
                        reason = "skipped for " + string(target)
                }
                return true, reason
        }
        return false, ""
}

// Substitute replaces {{ds}} and {{project}} placeholders.
func (c Case) Substitute(dataset, project string) (setup []string, query string) {
        repl := func(s string) string {
                s = strings.ReplaceAll(s, "{{ds}}", dataset)
                s = strings.ReplaceAll(s, "{{project}}", project)
                return s
        }
        setup = make([]string, len(c.SetupSQL))
        for i, s := range c.SetupSQL {
                setup[i] = repl(s)
        }
        return setup, repl(c.Query)
}

// UnmarshalYAML accepts setup as {sql: ...} objects.
func (c *Case) UnmarshalYAML(value *yaml.Node) error {
        type plain struct {
                Name        string       `yaml:"name"`
                Tags        []string     `yaml:"tags,omitempty"`
                Query       string       `yaml:"query"`
                Iterations  int          `yaml:"iterations,omitempty"`
                Warmup      int          `yaml:"warmup,omitempty"`
                MaxRatio    float64      `yaml:"max_ratio,omitempty"`
                MaxMS       int64        `yaml:"max_ms,omitempty"`
                ProjectID   string       `yaml:"project_id,omitempty"`
                SkipTargets []TargetName `yaml:"skip_targets,omitempty"`
                SkipReason  string       `yaml:"skip_reason,omitempty"`
                Setup       []struct {
                        SQL string `yaml:"sql"`
                } `yaml:"setup"`
        }
        var aux plain
        if err := value.Decode(&aux); err != nil {
                return err
        }
        c.Name = aux.Name
        c.Tags = aux.Tags
        c.Query = aux.Query
        c.Iterations = aux.Iterations
        c.Warmup = aux.Warmup
        c.MaxRatio = aux.MaxRatio
        c.MaxMS = aux.MaxMS
        c.ProjectID = aux.ProjectID
        c.SkipTargets = aux.SkipTargets
        c.SkipReason = aux.SkipReason
        c.SetupSQL = make([]string, 0, len(aux.Setup))
        for _, step := range aux.Setup {
                if step.SQL != "" {
                        c.SetupSQL = append(c.SetupSQL, step.SQL)
                }
        }
        return nil
}

func hashContent(s string) string {
        sum := sha256.Sum256([]byte(s))
        return hex.EncodeToString(sum[:8])
}

// QueryTimeout returns the wall-clock cap for query iterations. Cases
// that set max_ms above the default baseline cap use that value so
// slow targets (notably goccy on large joins) can finish.
func (c Case) QueryTimeout(fallback time.Duration) time.Duration {
        if c.MaxMS > defaultMaxMS {
                return time.Duration(c.MaxMS) * time.Millisecond
        }
        return fallback
}

package runner

import (
        "context"
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "runtime"
        "time"

        conf "github.com/vantaboard/bigquery-emulator/conformance/runner"
)

const defaultEngineBinary = "./bin/emulator_main"

// EmulatorTarget drives the in-repo emulator via an in-process gateway.
type EmulatorTarget struct {
        opts   TargetOptions
        env    *conf.EmulatorEnv
        client *RESTClient
}

func NewEmulatorTarget(opts TargetOptions) *EmulatorTarget {
        return &EmulatorTarget{opts: opts}
}

func (t *EmulatorTarget) Name() TargetName { return TargetEmulator }

func (t *EmulatorTarget) Start(ctx context.Context) error {
        if runtime.GOOS == "windows" {
                return errors.New("emulator benchmarks require POSIX subprocess support")
        }
        bin := t.opts.EngineBinary
        if bin == "" {
                bin = resolveEngineBinary()
        }
        profile, ok := conf.LookupProfile(conf.ProfileDuckDB)
        if !ok {
                return fmt.Errorf("profile %q not found", conf.ProfileDuckDB)
        }
        env, err := conf.StartEmulator(ctx, conf.HarnessOptions{
                EngineBinary: bin,
                EngineStdout: os.Stderr,
                EngineStderr: os.Stderr,
        }, profile)
        if err != nil {
                return err
        }
        t.env = env
        t.client = NewRESTClient(env.BaseURL, "bench")
        return nil
}

func (t *EmulatorTarget) SetupCase(ctx context.Context, c Case, dataset string) error {
        setup, _ := c.Substitute(dataset, c.ProjectID)
        base := fmt.Sprintf("%s/bigquery/v2/projects/%s", t.env.BaseURL, c.ProjectID)
        for _, sql := range setup {
                if err := conf.SetupSQLViaGateway(ctx, base, sql); err != nil {
                        return err
                }
        }
        return nil
}

func (t *EmulatorTarget) RunQuery(ctx context.Context, c Case, sql string, timeout time.Duration) (QueryResult, error) {
        t.client.ProjectID = c.ProjectID
        if timeout <= 0 {
                timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
        }
        return timedQuery(ctx, func(ctx context.Context) (QueryResult, error) {
                start := time.Now()
                status, body, err := t.client.PostQuery(ctx, sql)
                if err != nil {
                        return QueryResult{Error: err.Error()}, err
                }
                elapsed := time.Since(start)
                if status < 200 || status >= 300 {
                        return QueryResult{Elapsed: elapsed, Error: fmt.Sprintf("HTTP %d: %s", status, snippet(body))},
                                fmt.Errorf("query failed: HTTP %d", status)
                }
                resp, err := ParseQueryResponse(body)
                if err != nil {
                        return QueryResult{Elapsed: elapsed, Error: err.Error()}, err
                }
                rows := RESTRowsToMaps(resp.Schema, resp.Rows)
                hash, _ := HashRows(rows)
                out := QueryResult{
                        Elapsed:    elapsed,
                        Rows:       rows,
                        RowCount:   len(rows),
                        ResultHash: hash,
                }
                if resp.Statistics != nil && resp.Statistics.Query != nil {
                        out.Route = resp.Statistics.Query.EmulatorRoute
                        out.Phases = resp.Statistics.Query.EmulatorPhases
                }
                return out, nil
        }, timeout)
}

func (t *EmulatorTarget) Cleanup(context.Context) error {
        if t.env != nil {
                return t.env.Close()
        }
        return nil
}

func snippet(b []byte) string {
        const max = 200
        if len(b) <= max {
                return string(b)
        }
        return string(b[:max]) + "..."
}

func resolveEngineBinary() string {
        if p := os.Getenv("BIGQUERY_EMULATOR_BIN"); p != "" {
                if _, err := os.Stat(p); err == nil { //nolint:gosec // engine binary path is operator-supplied
                        return p
                }
        }
        candidates := []string{defaultEngineBinary, filepath.Join("bin", "emulator_main")}
        for _, c := range candidates {
                if _, err := os.Stat(c); err == nil { //nolint:gosec // candidate paths are bench-owned defaults
                        return c
                }
        }
        return defaultEngineBinary
}

var _ Target = (*EmulatorTarget)(nil)

package runner

import (
        "bufio"
        "context"
        "fmt"
        "io"
        "net/http"
        "os"
        "os/exec"
        "strings"
        "time"
)

const defaultGoccyImage = "ghcr.io/goccy/bigquery-emulator:0.8.1"

// goccyProject is the single project the goccy container is started
// with. goccy/bigquery-emulator 404s on any other project id, so every
// case runs under this one (dataset names are unique per case).
const goccyProject = "bench"

// DefaultGoccyImage returns the pinned goccy container reference.
func DefaultGoccyImage() string { return defaultGoccyImage }

// GoccyTarget drives the goccy/bigquery-emulator Docker image.
type GoccyTarget struct {
        opts       TargetOptions
        container  string
        hostPort   int
        client     *RESTClient
        httpClient *http.Client
        logsCancel context.CancelFunc
}

func NewGoccyTarget(opts TargetOptions) *GoccyTarget {
        return &GoccyTarget{opts: opts}
}

func (t *GoccyTarget) Name() TargetName { return TargetGoccy }

func (t *GoccyTarget) Start(ctx context.Context) error {
        image := t.opts.GoccyImage
        if image == "" {
                image = defaultGoccyImage
        }
        port, err := freePort()
        if err != nil {
                return err
        }
        t.hostPort = port
        name := fmt.Sprintf("bq-bench-goccy-%d", time.Now().UnixNano())
        args := []string{
                "run", "--rm", "-d",
                "--name", name,
                "-p", fmt.Sprintf("127.0.0.1:%d:9050", port),
                image,
                "--project=" + goccyProject,
                "--log-level=debug",
        }
        cmd := exec.CommandContext(ctx, "docker", args...) //nolint:gosec // bench operator supplies the image ref
        if out, err := cmd.CombinedOutput(); err != nil {
                return fmt.Errorf("docker run %s: %w: %s", image, err, strings.TrimSpace(string(out)))
        }
        t.container = name
        t.startLogFollower()
        t.httpClient = &http.Client{Timeout: 0}
        if err := t.waitReady(ctx); err != nil {
                _ = t.Cleanup(ctx)
                return err
        }
        t.client = &RESTClient{
                BaseURL:   fmt.Sprintf("http://127.0.0.1:%d", port),
                ProjectID: "bench",
                HTTP:      t.httpClient,
        }
        return nil
}

func (t *GoccyTarget) waitReady(ctx context.Context) error {
        deadline, ok := ctx.Deadline()
        if !ok {
                deadline = time.Now().Add(60 * time.Second)
        }
        url := fmt.Sprintf("http://127.0.0.1:%d/bigquery/v2/projects/bench/queries", t.hostPort)
        body := []byte(`{"query":"SELECT 1","useLegacySql":false}`)
        for time.Now().Before(deadline) {
                req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, strings.NewReader(string(body)))
                if err != nil {
                        return err
                }
                req.Header.Set("Content-Type", "application/json")
                resp, err := t.httpClient.Do(req)
                if err == nil {
                        _ = resp.Body.Close()
                        if resp.StatusCode >= 200 && resp.StatusCode < 500 {
                                return nil
                        }
                }
                time.Sleep(500 * time.Millisecond)
        }
        return fmt.Errorf("goccy emulator on port %d not ready", t.hostPort)
}

func (t *GoccyTarget) EnsureReady(ctx context.Context) error {
        if t.client == nil {
                return t.Start(ctx)
        }
        pingCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
        defer cancel()
        if err := t.ping(pingCtx); err == nil {
                return nil
        }
        return t.restart(ctx)
}

func (t *GoccyTarget) ping(ctx context.Context) error {
        status, body, err := t.client.PostQuery(ctx, "SELECT 1")
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("ping HTTP %d: %s", status, snippet(body))
        }
        return nil
}

func (t *GoccyTarget) restart(ctx context.Context) error {
        _ = t.Cleanup(ctx)
        return t.Start(ctx)
}

func (t *GoccyTarget) SetupCase(ctx context.Context, c Case, dataset string) error {
        setup, _ := c.Substitute(dataset, goccyProject)
        t.client.ProjectID = goccyProject
        setupTimeout := c.QueryTimeout(time.Duration(defaultTimeoutMS) * time.Millisecond)
        if err := t.client.CreateDataset(ctx, dataset); err != nil {
                return err
        }
        for _, sql := range setup {
                setupCtx, cancel := context.WithTimeout(ctx, setupTimeout)
                status, body, err := t.client.PostQuery(setupCtx, sql)
                cancel()
                if err != nil {
                        return err
                }
                if status < 200 || status >= 300 {
                        return fmt.Errorf("setup sql -> HTTP %d: %s", status, snippet(body))
                }
        }
        return nil
}

func (t *GoccyTarget) RunQuery(ctx context.Context, c Case, sql string, timeout time.Duration) (QueryResult, error) {
        t.client.ProjectID = goccyProject
        if timeout <= 0 {
                timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
        }
        return timedQuery(ctx, func(ctx context.Context) (QueryResult, error) {
                timedSQL, err := prepareGoccyDDLQuery(ctx, t.client, sql)
                if err != nil {
                        return QueryResult{Error: err.Error()}, err
                }
                status, body, err := t.client.PostQuery(ctx, timedSQL)
                if err != nil {
                        return QueryResult{Error: err.Error()}, err
                }
                if status < 200 || status >= 300 {
                        return QueryResult{Error: fmt.Sprintf("HTTP %d: %s", status, snippet(body))},
                                fmt.Errorf("query failed: HTTP %d", status)
                }
                resp, err := ParseQueryResponse(body)
                if err != nil {
                        return QueryResult{Error: err.Error()}, err
                }
                rows := RESTRowsToMaps(resp.Schema, resp.Rows)
                hash, _ := HashRows(rows)
                return QueryResult{
                        Rows:       rows,
                        RowCount:   len(rows),
                        ResultHash: hash,
                }, nil
        }, timeout)
}

func (t *GoccyTarget) startLogFollower() {
        if t.container == "" {
                return
        }
        logCtx, cancel := context.WithCancel(context.Background())
        t.logsCancel = cancel
        go func() {
                // #nosec G204 -- container name is bench-owned.
                cmd := exec.CommandContext(
                        logCtx,
                        "docker",
                        "logs",
                        "-f",
                        t.container,
                )
                stdout, err := cmd.StdoutPipe()
                if err != nil {
                        return
                }
                if err := cmd.Start(); err != nil {
                        return
                }
                defer func() { _ = stdout.Close() }()
                streamPrefixedLines(stdout, "[goccy] ")
                _ = cmd.Wait()
        }()
}

func streamPrefixedLines(r io.Reader, prefix string) {
        sc := bufio.NewScanner(r)
        for sc.Scan() {
                _, _ = fmt.Fprintf(os.Stderr, "%s%s\n", prefix, sc.Text())
        }
}

func (t *GoccyTarget) Cleanup(ctx context.Context) error {
        if t.logsCancel != nil {
                t.logsCancel()
                t.logsCancel = nil
        }
        if t.container == "" {
                return nil
        }
        cmd := exec.CommandContext(ctx, "docker", "rm", "-f", t.container) //nolint:gosec // container name is bench-owned
        _ = cmd.Run()
        t.container = ""
        return nil
}

// ImageTag extracts the tag from a full docker image reference.
func ImageTag(image string) string {
        if i := strings.LastIndex(image, ":"); i >= 0 {
                return image[i+1:]
        }
        return image
}

var _ Target = (*GoccyTarget)(nil)

package runner

import (
        "context"
        "fmt"
        "regexp"
        "strings"
)

// createOrReplaceAs matches CREATE OR REPLACE TABLE/VIEW ... AS (CTAS / view body).
// Bench DDL cases use this shape; goccy 0.8.1 treats a second CREATE as duplicate
// rather than honoring OR REPLACE, so we DROP IF EXISTS then CREATE before timing.
var createOrReplaceAsRE = regexp.MustCompile(
        `(?is)^CREATE\s+OR\s+REPLACE\s+(TABLE|VIEW)\s+(\S+)\s+AS\s+`,
)

// rewriteGoccyCreateOrReplace splits CREATE OR REPLACE TABLE/VIEW ... AS into an
// idempotent DROP + CREATE pair for goccy. ok is false when sql is not that shape.
func rewriteGoccyCreateOrReplace(sql string) (dropSQL, createSQL string, ok bool) {
        trimmed := strings.TrimSpace(sql)
        m := createOrReplaceAsRE.FindStringSubmatch(trimmed)
        if m == nil {
                return "", trimmed, false
        }
        kind := strings.ToUpper(m[1])
        object := m[2]
        dropSQL = fmt.Sprintf("DROP %s IF EXISTS %s", kind, object)
        createSQL = createOrReplaceAsRE.ReplaceAllString(trimmed, "CREATE "+kind+" "+object+" AS ")
        return dropSQL, createSQL, true
}

// prepareGoccyDDLQuery runs an untimed DROP IF EXISTS when sql is CREATE OR REPLACE
// TABLE/VIEW ... AS, then returns equivalent CREATE ... AS for the timed iteration.
func prepareGoccyDDLQuery(ctx context.Context, client *RESTClient, sql string) (string, error) {
        dropSQL, createSQL, rewrite := rewriteGoccyCreateOrReplace(sql)
        if !rewrite {
                return strings.TrimSpace(sql), nil
        }
        status, body, err := client.PostQuery(ctx, dropSQL)
        if err != nil {
                return "", fmt.Errorf("goccy ddl preamble drop: %w", err)
        }
        if status < 200 || status >= 300 {
                return "", fmt.Errorf("goccy ddl preamble drop -> HTTP %d: %s", status, snippet(body))
        }
        return createSQL, nil
}

package runner

import (
        "crypto/sha256"
        "encoding/hex"
        "encoding/json"
        "maps"
        "sort"
        "strconv"
)

// NormalizeRows returns a deterministic JSON encoding for hashing.
func NormalizeRows(rows []map[string]string) ([]byte, error) {
        cp := make([]map[string]string, len(rows))
        for i, r := range rows {
                cp[i] = make(map[string]string, len(r))
                maps.Copy(cp[i], r)
        }
        sort.Slice(cp, func(i, j int) bool {
                a, _ := json.Marshal(cp[i])
                b, _ := json.Marshal(cp[j])
                return string(a) < string(b)
        })
        return json.Marshal(cp)
}

// HashRows returns SHA-256 hex digest of normalized rows.
func HashRows(rows []map[string]string) (string, error) {
        norm, err := NormalizeRows(rows)
        if err != nil {
                return "", err
        }
        sum := sha256.Sum256(norm)
        return hex.EncodeToString(sum[:]), nil
}

// RowsFromBQ converts REST rows to map form for hashing.
func RowsFromBQ(rows []map[string]any) []map[string]string {
        out := make([]map[string]string, 0, len(rows))
        for _, row := range rows {
                m := make(map[string]string, len(row))
                for k, v := range row {
                        m[k] = cellToString(v)
                }
                out = append(out, m)
        }
        return out
}

func cellToString(v any) string {
        switch t := v.(type) {
        case nil:
                return ""
        case string:
                return t
        case float64:
                return jsonNumber(t)
        case bool:
                if t {
                        return "true"
                }
                return "false"
        default:
                b, _ := json.Marshal(t)
                return string(b)
        }
}

func jsonNumber(f float64) string {
        if f == float64(int64(f)) {
                return strconv.FormatInt(int64(f), 10)
        }
        return strconv.FormatFloat(f, 'f', -1, 64)
}

package runner

import "time"

const minRatioMS int64 = 1

// LatencyP50MS returns the primary BigQuery server-side latency (execution p50).
// Falls back to total client wall-clock for legacy baselines.
func (b BaselineCase) LatencyP50MS() int64 {
        if b.ExecutionP50MS > 0 {
                return b.ExecutionP50MS
        }
        return b.TotalP50MS
}

// LatencyP50ForRatio returns a safe BQ denominator for ratio math (minimum 1ms).
func (b BaselineCase) LatencyP50ForRatio() int64 {
        ms := b.LatencyP50MS()
        if ms <= 0 {
                return minRatioMS
        }
        return ms
}

// CompareLatencyP50 returns the emulator latency used for baseline comparison.
// Prefers server-side total_engine; falls back to HTTP wall-clock.
func (r CaseResult) CompareLatencyP50() time.Duration {
        if r.EngineP50 > 0 {
                return r.EngineP50
        }
        return r.Latency.P50
}

// CompareLatencyMSForRatio returns emulator latency in milliseconds with a 1ms floor.
func (r CaseResult) CompareLatencyMSForRatio() int64 {
        ms := r.CompareLatencyP50().Milliseconds()
        if ms <= 0 {
                return minRatioMS
        }
        return ms
}

// EngineP50FromPhases extracts total_engine p50 from aggregated phase stats.
func EngineP50FromPhases(phases PhaseStats) time.Duration {
        if phases == nil {
                return 0
        }
        if stats, ok := phases["total_engine"]; ok {
                return stats.P50
        }
        return 0
}

package runner

import (
        "bytes"
        "context"
        "encoding/json"
        "fmt"
        "io"
        "net/http"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// RESTClient issues jobs.query calls against a BigQuery REST emulator.
type RESTClient struct {
        BaseURL   string
        ProjectID string
        HTTP      *http.Client
}

func NewRESTClient(baseURL, projectID string) *RESTClient {
        return &RESTClient{
                BaseURL:   baseURL,
                ProjectID: projectID,
                HTTP:      &http.Client{Timeout: 0},
        }
}

// CreateDataset registers a dataset on the emulator. HTTP 409 (already exists)
// is treated as success so repeated case setup is idempotent.
func (c *RESTClient) CreateDataset(ctx context.Context, datasetID string) error {
        body, err := json.Marshal(map[string]any{
                "datasetReference": map[string]string{
                        "datasetId": datasetID,
                        "projectId": c.ProjectID,
                },
        })
        if err != nil {
                return err
        }
        url := fmt.Sprintf("%s/bigquery/v2/projects/%s/datasets", c.BaseURL, c.ProjectID)
        req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
        if err != nil {
                return err
        }
        req.Header.Set("Content-Type", "application/json")
        resp, err := c.HTTP.Do(req)
        if err != nil {
                return err
        }
        defer func() { _ = resp.Body.Close() }()
        data, err := io.ReadAll(resp.Body)
        if err != nil {
                return err
        }
        if resp.StatusCode == http.StatusConflict {
                return nil
        }
        if resp.StatusCode < 200 || resp.StatusCode >= 300 {
                return fmt.Errorf("create dataset %s -> HTTP %d: %s", datasetID, resp.StatusCode, snippet(data))
        }
        return nil
}

func (c *RESTClient) PostQuery(ctx context.Context, sql string) (int, []byte, error) {
        body, err := json.Marshal(map[string]any{
                "query":        sql,
                "useLegacySql": false,
        })
        if err != nil {
                return 0, nil, err
        }
        url := fmt.Sprintf("%s/bigquery/v2/projects/%s/queries", c.BaseURL, c.ProjectID)
        req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
        if err != nil {
                return 0, nil, err
        }
        req.Header.Set("Content-Type", "application/json")
        resp, err := c.HTTP.Do(req)
        if err != nil {
                return 0, nil, err
        }
        defer func() { _ = resp.Body.Close() }()
        data, err := io.ReadAll(resp.Body)
        return resp.StatusCode, data, err
}

// ParseQueryResponse decodes a successful jobs.query body.
func ParseQueryResponse(body []byte) (bqtypes.QueryResponse, error) {
        var out bqtypes.QueryResponse
        if err := json.Unmarshal(body, &out); err != nil {
                return out, err
        }
        return out, nil
}

// RESTRowsToMaps converts REST f/v rows to string maps.
func RESTRowsToMaps(schema *bqtypes.TableSchema, rows []bqtypes.Row) []map[string]string {
        if schema == nil {
                return nil
        }
        names := make([]string, len(schema.Fields))
        for i, f := range schema.Fields {
                names[i] = f.Name
        }
        out := make([]map[string]string, 0, len(rows))
        for _, row := range rows {
                m := make(map[string]string, len(names))
                for i, name := range names {
                        if i < len(row.F) {
                                m[name] = cellToString(row.F[i].V)
                        }
                }
                out = append(out, m)
        }
        return out
}

func timedQuery(
        ctx context.Context,
        fn func(context.Context) (QueryResult, error),
        timeout time.Duration,
) (QueryResult, error) {
        ctx, cancel := context.WithTimeout(ctx, timeout)
        defer cancel()
        start := time.Now()
        res, err := fn(ctx)
        res.Elapsed = time.Since(start)
        return res, err
}

package runner

import (
        "encoding/json"
        "fmt"
        "io"
        "os"
        "slices"
        "strings"
        "time"
)

// CaseResult is the aggregated outcome for one (case, target) pair.
type CaseResult struct {
        CaseName         string        `json:"case_name"`
        Target           TargetName    `json:"target"`
        ContentHash      string        `json:"content_hash,omitempty"`
        Outcome          Outcome       `json:"outcome"`
        Error            string        `json:"error,omitempty"`
        Latency          LatencyStats  `json:"latency"`
        EngineP50        time.Duration `json:"engine_p50,omitempty"`
        ExecutionP50     time.Duration `json:"execution_p50,omitempty"`
        QueueP50         time.Duration `json:"queue_p50,omitempty"`
        TotalSlotMsP50   int64         `json:"total_slot_ms_p50,omitempty"`
        BytesProcessed   int64         `json:"bytes_processed,omitempty"`
        Route            string        `json:"route,omitempty"`
        Phases           PhaseStats    `json:"phases,omitempty"`
        ResultHash       string        `json:"result_hash,omitempty"`
        RowCount         int           `json:"row_count,omitempty"`
        Pass             *bool         `json:"pass,omitempty"`
        CompareReason    string        `json:"compare_reason,omitempty"`
        BQExecutionP50MS int64         `json:"bq_execution_p50_ms,omitempty"`
        Ratio            float64       `json:"ratio_vs_bq,omitempty"`
}

// RunReport is the machine-readable benchmark output.
type RunReport struct {
        Timestamp  time.Time    `json:"timestamp"`
        CommitSHA  string       `json:"commit_sha,omitempty"`
        Host       string       `json:"host,omitempty"`
        GoccyImage string       `json:"goccy_image,omitempty"`
        Targets    []TargetName `json:"targets"`
        Results    []CaseResult `json:"results"`
}

// SaveReport writes JSON results.
func SaveReport(path string, r RunReport) error {
        r.Timestamp = r.Timestamp.UTC()
        raw, err := json.MarshalIndent(r, "", "  ")
        if err != nil {
                return err
        }
        return os.WriteFile(path, append(raw, '\n'), 0o644) //nolint:gosec // 0o644 is fine for benchmark output JSON
}

// MergeReport overlays fresh benchmark results onto an existing report,
// replacing any (case_name, target) rows that appear in the fresh run and
// preserving everything else. This makes partial reruns (e.g.
// --case create_view_100k) update only the cases that ran instead of
// discarding the rest of bench/results.json.
func MergeReport(existing, fresh RunReport) RunReport {
        out := existing
        out.Timestamp = fresh.Timestamp
        if fresh.CommitSHA != "" {
                out.CommitSHA = fresh.CommitSHA
        }
        if fresh.Host != "" {
                out.Host = fresh.Host
        }
        if fresh.GoccyImage != "" {
                out.GoccyImage = fresh.GoccyImage
        }
        if len(fresh.Targets) > 0 {
                out.Targets = fresh.Targets
        }

        freshKeys := make(map[string]struct{}, len(fresh.Results))
        for _, r := range fresh.Results {
                freshKeys[resultKey(r)] = struct{}{}
        }
        kept := make([]CaseResult, 0, len(existing.Results))
        for _, r := range existing.Results {
                if _, replace := freshKeys[resultKey(r)]; replace {
                        continue
                }
                kept = append(kept, r)
        }
        kept = append(kept, fresh.Results...)
        out.Results = kept
        slices.SortFunc(out.Results, func(a, b CaseResult) int {
                if c := strings.Compare(a.CaseName, b.CaseName); c != 0 {
                        return c
                }
                return strings.Compare(string(a.Target), string(b.Target))
        })
        return out
}

func resultKey(r CaseResult) string {
        return r.CaseName + "\x00" + string(r.Target)
}

// LoadReport reads a results JSON file.
func LoadReport(path string) (RunReport, error) {
        raw, err := os.ReadFile(path) //nolint:gosec // report path is CLI-controlled
        if err != nil {
                return RunReport{}, err
        }
        var r RunReport
        if err := json.Unmarshal(raw, &r); err != nil {
                return RunReport{}, err
        }
        return r, nil
}

// PrintTextReport renders a human-readable summary.
func PrintTextReport(w io.Writer, report RunReport, baseline *BaselineFile) {
        _, _ = fmt.Fprintf(w, "benchmark report @ %s\n", report.Timestamp.Format(time.RFC3339))
        if report.CommitSHA != "" {
                _, _ = fmt.Fprintf(w, "commit: %s\n", report.CommitSHA)
        }
        if report.GoccyImage != "" {
                _, _ = fmt.Fprintf(w, "goccy image: %s\n", report.GoccyImage)
        }
        _, _ = fmt.Fprintf(w, "\n%-24s %-10s %-8s %-10s %-12s %-8s %s\n",
                "case", "target", "outcome", "p50", "route", "rows", "notes")
        for _, r := range report.Results {
                notes := r.CompareReason
                if notes == "" && r.Error != "" {
                        notes = r.Error
                }
                _, _ = fmt.Fprintf(w, "%-24s %-10s %-8s %-10s %-12s %-8d %s\n",
                        r.CaseName, r.Target, r.Outcome, r.Latency.P50, r.Route, r.RowCount, notes)
        }
        if baseline != nil {
                ok, total := 0, 0
                for _, r := range report.Results {
                        if r.Target == TargetEmulator {
                                total++
                                if r.Pass != nil && *r.Pass {
                                        ok++
                                }
                        }
                }
                _, _ = fmt.Fprintf(w, "\nemulator vs baseline: %d/%d passed\n", ok, total)
        }
}

package runner

import (
        "context"
        "errors"
        "fmt"
        "os"
        "strings"
        "time"
)

// RunOptions configures a benchmark execution.
type RunOptions struct {
        CasesDir   string
        CaseFilter string
        Targets    []Target
        Timeout    time.Duration
        Baseline   *BaselineFile
        Compare    bool
        // Progress receives human-readable progress lines as the run
        // advances (target startup, per-case setup, per-iteration
        // completions). nil disables progress output.
        Progress func(format string, args ...any)
}

func (o RunOptions) logf(format string, args ...any) {
        if o.Progress != nil {
                o.Progress(format, args...)
        }
}

// Run executes all cases against the configured targets.
func Run(ctx context.Context, opts RunOptions) (RunReport, error) {
        cases, err := LoadCases(opts.CasesDir)
        if err != nil {
                return RunReport{}, err
        }
        cases, err = filterCases(cases, opts.CaseFilter)
        if err != nil {
                return RunReport{}, err
        }
        timeout := opts.Timeout
        if timeout <= 0 {
                timeout = time.Duration(defaultTimeoutMS) * time.Millisecond
        }

        report := RunReport{
                Timestamp: time.Now().UTC(),
                CommitSHA: os.Getenv("GITHUB_SHA"),
                Host:      hostname(),
                Targets:   targetNames(opts.Targets),
        }

        for _, target := range opts.Targets {
                opts.logf("starting target %s...", target.Name())
                startBegan := time.Now()
                if err := target.Start(ctx); err != nil {
                        return report, fmt.Errorf("start %s: %w", target.Name(), err)
                }
                opts.logf("target %s ready in %s", target.Name(), time.Since(startBegan).Round(time.Millisecond))
                defer func(t Target) { _ = t.Cleanup(ctx) }(target)
        }

        for ci, c := range cases {
                runCaseAcrossTargets(ctx, opts, &report, cases, ci, c, timeout)
        }
        return report, nil
}

func runCaseAcrossTargets(
        ctx context.Context,
        opts RunOptions,
        report *RunReport,
        cases []Case,
        ci int,
        c Case,
        timeout time.Duration,
) {
        dataset := datasetForCase(c.Name)
        for _, target := range opts.Targets {
                cr, run := prepareCaseRun(ctx, opts, ci, len(cases), c, target)
                if !run {
                        report.Results = append(report.Results, cr)
                        continue
                }
                opts.logf("[%d/%d] %s on %s: setup...", ci+1, len(cases), c.Name, target.Name())
                cr = runCase(ctx, opts, target, c, dataset, timeout)
                if gt, ok := target.(*GoccyTarget); ok && cr.Outcome == OutcomeError {
                        _ = gt.EnsureReady(ctx)
                }
                logCaseResult(opts, ci+1, len(cases), c, target, cr)
                cr = enrichWithBaseline(opts, target, c, cr)
                report.Results = append(report.Results, cr)
        }
}

// prepareCaseRun handles skip and goccy health checks. The bool is
// false when the caller should append cr and continue without running.
func prepareCaseRun(
        ctx context.Context,
        opts RunOptions,
        index, total int,
        c Case,
        target Target,
) (CaseResult, bool) {
        if skipped, reason := c.SkippedFor(target.Name()); skipped {
                cr := CaseResult{
                        CaseName:    c.Name,
                        Target:      target.Name(),
                        ContentHash: c.ContentHash,
                        Outcome:     OutcomeSkipped,
                        Error:       reason,
                }
                logCaseResult(opts, index+1, total, c, target, cr)
                return cr, false
        }
        if gt, ok := target.(*GoccyTarget); ok {
                if err := gt.EnsureReady(ctx); err != nil {
                        cr := CaseResult{
                                CaseName:    c.Name,
                                Target:      target.Name(),
                                ContentHash: c.ContentHash,
                                Outcome:     OutcomeError,
                                Error:       fmt.Sprintf("goccy not ready: %v", err),
                        }
                        logCaseResult(opts, index+1, total, c, target, cr)
                        return cr, false
                }
        }
        return CaseResult{}, true
}

func filterCases(cases []Case, name string) ([]Case, error) {
        if name == "" {
                return cases, nil
        }
        filtered := cases[:0]
        for _, c := range cases {
                if c.Name == name {
                        filtered = append(filtered, c)
                }
        }
        if len(filtered) == 0 {
                return nil, fmt.Errorf("case %q not found", name)
        }
        return filtered, nil
}

func logCaseResult(opts RunOptions, index, total int, c Case, target Target, cr CaseResult) {
        switch cr.Outcome {
        case OutcomeOK:
                opts.logf("[%d/%d] %s on %s: done (p50 %s, %d rows)",
                        index, total, c.Name, target.Name(),
                        cr.Latency.P50.Round(time.Millisecond), cr.RowCount)
        case OutcomeSkipped:
                opts.logf("[%d/%d] %s on %s: skipped (%s)",
                        index, total, c.Name, target.Name(), cr.Error)
        default:
                opts.logf("[%d/%d] %s on %s: %s (%s)",
                        index, total, c.Name, target.Name(), cr.Outcome, cr.Error)
        }
}

func enrichWithBaseline(opts RunOptions, target Target, c Case, cr CaseResult) CaseResult {
        if opts.Compare && opts.Baseline != nil && target.Name() == TargetEmulator {
                if base, ok := opts.Baseline.Cases[c.Name]; ok {
                        pass, reason := CompareToBaseline(c, base, cr)
                        cr.Pass = &pass
                        cr.CompareReason = reason
                        cr.BQExecutionP50MS = base.LatencyP50MS()
                        bqDenom := base.LatencyP50ForRatio()
                        emuNum := cr.CompareLatencyMSForRatio()
                        if bqDenom > 0 && emuNum > 0 {
                                cr.Ratio = float64(emuNum) / float64(bqDenom)
                        }
                } else {
                        pass := false
                        cr.Pass = &pass
                        cr.CompareReason = "no baseline for case"
                }
        }
        if opts.Baseline != nil && cr.Outcome == OutcomeOK && cr.ResultHash != "" {
                if base, ok := opts.Baseline.Cases[c.Name]; ok && base.ResultHash != "" &&
                        base.ResultHash != cr.ResultHash {
                        cr.Outcome = OutcomeWrongResult
                        if target.Name() == TargetEmulator && cr.Pass != nil {
                                pass := false
                                cr.Pass = &pass
                                cr.CompareReason = "result hash mismatch vs baseline"
                        }
                }
        }
        return cr
}

func runCase(
        ctx context.Context,
        opts RunOptions,
        target Target,
        c Case,
        dataset string,
        timeout time.Duration,
) CaseResult {
        project := c.ProjectID
        switch tt := target.(type) {
        case *BigQueryTarget:
                project = tt.ProjectID()
        case *GoccyTarget:
                project = goccyProject
        }
        dsRef := datasetRef(target.Name(), project, dataset)
        setupBegan := time.Now()
        if setupErr := target.SetupCase(ctx, c, dsRef); setupErr != nil {
                return CaseResult{
                        CaseName:    c.Name,
                        Target:      target.Name(),
                        ContentHash: c.ContentHash,
                        Outcome:     OutcomeError,
                        Error:       setupErr.Error(),
                }
        }
        opts.logf("    %s on %s: setup done in %s, running %d iterations...",
                c.Name, target.Name(), time.Since(setupBegan).Round(time.Millisecond), c.Iterations)
        _, query := c.Substitute(dsRef, project)
        caseTimeout := c.QueryTimeout(timeout)

        samples, execSamples, queueSamples, slotSamples, phaseIters, last, outcome, lastErr := runQueryIterations(
                ctx, opts, target, c, query, caseTimeout)

        phases := ComputePhaseStats(phaseIters, c.Warmup)
        cr := CaseResult{
                CaseName:       c.Name,
                Target:         target.Name(),
                ContentHash:    c.ContentHash,
                Outcome:        outcome,
                Error:          lastErr,
                Latency:        ComputeLatencyStats(samples, c.Warmup),
                Phases:         phases,
                EngineP50:      EngineP50FromPhases(phases),
                Route:          last.Route,
                ResultHash:     last.ResultHash,
                RowCount:       last.RowCount,
                BytesProcessed: last.BytesProcessed,
        }
        if len(execSamples) > 0 {
                cr.ExecutionP50 = ComputeLatencyStats(execSamples, c.Warmup).P50
        }
        if len(queueSamples) > 0 {
                cr.QueueP50 = ComputeLatencyStats(queueSamples, c.Warmup).P50
        }
        if len(slotSamples) > 0 {
                cr.TotalSlotMsP50 = ComputeInt64P50(slotSamples, c.Warmup)
        }
        return cr
}

func runQueryIterations(
        ctx context.Context,
        opts RunOptions,
        target Target,
        c Case,
        query string,
        timeout time.Duration,
) (
        samples, execSamples, queueSamples []time.Duration,
        slotSamples []int64,
        phaseIters []map[string]int64,
        last QueryResult,
        outcome Outcome,
        lastErr string,
) {
        outcome = OutcomeOK
        for i := 0; i < c.Iterations; i++ {
                res, err := target.RunQuery(ctx, c, query, timeout)
                last = res
                if err != nil {
                        if errors.Is(err, context.DeadlineExceeded) || ctx.Err() == context.DeadlineExceeded {
                                outcome = OutcomeTimeout
                                lastErr = "timeout"
                                break
                        }
                        outcome = OutcomeError
                        lastErr = res.Error
                        if lastErr == "" {
                                lastErr = err.Error()
                        }
                        break
                }
                logQueryIteration(opts, c, target, i, res)
                samples = append(samples, res.Elapsed)
                if res.ExecutionValid {
                        execSamples = append(execSamples, res.ExecutionOnly)
                }
                if res.QueueOnly > 0 {
                        queueSamples = append(queueSamples, res.QueueOnly)
                }
                if res.ExecutionValid {
                        slotSamples = append(slotSamples, res.SlotMs)
                }
                if len(res.Phases) > 0 {
                        phaseIters = append(phaseIters, res.Phases)
                }
        }
        return samples, execSamples, queueSamples, slotSamples, phaseIters, last, outcome, lastErr
}

func logQueryIteration(opts RunOptions, c Case, target Target, i int, res QueryResult) {
        label := ""
        if i < c.Warmup {
                label = " (warmup)"
        }
        opts.logf("    %s on %s: iteration %d/%d%s took %s",
                c.Name, target.Name(), i+1, c.Iterations, label,
                res.Elapsed.Round(time.Millisecond))
        if target.Name() == TargetBigQuery && res.ExecutionValid {
                clientOverhead := res.Elapsed - res.ExecutionOnly
                opts.logf("      bq stats: execution=%s queue=%s slot_ms=%d client_overhead=%s",
                        res.ExecutionOnly.Round(time.Millisecond),
                        res.QueueOnly.Round(time.Millisecond),
                        res.SlotMs,
                        clientOverhead.Round(time.Millisecond))
        }
}

func datasetForCase(name string) string {
        return "ds_" + strings.ReplaceAll(name, "-", "_")
}

func datasetRef(target TargetName, project, dataset string) string {
        if target == TargetBigQuery {
                return project + "." + dataset
        }
        return dataset
}

func targetNames(targets []Target) []TargetName {
        out := make([]TargetName, len(targets))
        for i, t := range targets {
                out[i] = t.Name()
        }
        return out
}

func hostname() string {
        h, err := os.Hostname()
        if err != nil {
                return ""
        }
        return h
}

package runner

import (
        "math"
        "slices"
        "time"
)

// LatencyStats summarizes repeated latency samples (post-warmup).
type LatencyStats struct {
        Min time.Duration `json:"min"`
        P50 time.Duration `json:"p50"`
        P90 time.Duration `json:"p90"`
        Max time.Duration `json:"max"`
        N   int           `json:"n"`
}

// PhaseStats summarizes per-phase timings across iterations.
type PhaseStats map[string]LatencyStats

// ComputeLatencyStats returns percentiles for samples after warmup.
func ComputeLatencyStats(samples []time.Duration, warmup int) LatencyStats {
        if len(samples) == 0 {
                return LatencyStats{}
        }
        start := warmup
        if start >= len(samples) {
                start = len(samples) - 1
        }
        if start < 0 {
                start = 0
        }
        used := append([]time.Duration(nil), samples[start:]...)
        slices.Sort(used)
        return LatencyStats{
                Min: used[0],
                P50: percentile(used, 0.50),
                P90: percentile(used, 0.90),
                Max: used[len(used)-1],
                N:   len(used),
        }
}

// ComputePhaseStats aggregates phase timings (microseconds) across iterations.
func ComputePhaseStats(iterations []map[string]int64, warmup int) PhaseStats {
        if len(iterations) == 0 {
                return nil
        }
        start := warmup
        if start >= len(iterations) {
                start = len(iterations) - 1
        }
        names := map[string]struct{}{}
        for i := start; i < len(iterations); i++ {
                for k := range iterations[i] {
                        names[k] = struct{}{}
                }
        }
        out := make(PhaseStats, len(names))
        for name := range names {
                var samples []time.Duration
                for i := start; i < len(iterations); i++ {
                        if us, ok := iterations[i][name]; ok {
                                samples = append(samples, time.Duration(us)*time.Microsecond)
                        }
                }
                out[name] = ComputeLatencyStats(samples, 0)
        }
        return out
}

// ComputeInt64P50 returns the p50 of int64 samples after warmup.
func ComputeInt64P50(samples []int64, warmup int) int64 {
        if len(samples) == 0 {
                return 0
        }
        start := warmup
        if start >= len(samples) {
                start = len(samples) - 1
        }
        if start < 0 {
                start = 0
        }
        used := append([]int64(nil), samples[start:]...)
        slices.Sort(used)
        idx := max(int(math.Round(0.50*float64(len(used)-1))), 0)
        if idx >= len(used) {
                idx = len(used) - 1
        }
        return used[idx]
}

func percentile(sorted []time.Duration, p float64) time.Duration {
        if len(sorted) == 0 {
                return 0
        }
        if len(sorted) == 1 {
                return sorted[0]
        }
        idx := max(int(math.Round(p*float64(len(sorted)-1))), 0)
        if idx >= len(sorted) {
                idx = len(sorted) - 1
        }
        return sorted[idx]
}

package runner

import "net"

func freePort() (int, error) {
        l, err := net.Listen("tcp", "127.0.0.1:0")
        if err != nil {
                return 0, err
        }
        defer func() { _ = l.Close() }()
        return l.Addr().(*net.TCPAddr).Port, nil
}

// CLI parsing for the gateway_main binary.
//
// The parser lives in its own file so it can be exercised by unit
// tests (cli_test.go) without forking a process. The output is a
// normalized Config struct that main() turns into a gateway.Options.
//
// # Flag aliasing
//
// Every operator-facing flag accepts both the legacy
// underscore-separated name this repository started with
// (`--http_port`) and the hyphen-separated equivalent documented for
// `gateway_main` (`--http-port`). Both names target
// the same parsed value; whichever appears last on the command line
// wins, the same way Go's `flag` package handles late overrides for
// any single flag. This keeps existing scripts/Taskfiles working
// while letting operators copy invocation snippets straight from the
// upstream documentation.
//
// A handful of flags additionally accept the names the widely-used
// goccy/bigquery-emulator exposes, so invocation snippets written for
// that emulator keep working here without rewriting:
//
//   - `--port`           -> alias for `--http-port`
//   - `--project`        -> alias for `--project-id`
//   - `--data-from-yaml` -> alias for `--seed-data-file`
//
// These are pure aliases onto the same Config fields; the canonical
// names above remain the primary, `--help`-documented spelling.
//
// # Environment-variable fallbacks
//
// Three settings honor environment variables when the CLI flag is
// not supplied, per the stable compatibility contract in docs/SEEDING.md:
//
//   - BIGQUERY_EMULATOR_INITIAL_DATA_DIR (also EMULATOR_INITIAL_DATA_DIR)
//     populates --initial-data-dir.
//   - BIGQUERY_EMULATOR_SEED_TOKEN populates --seed-api-seed-token.
//   - BIGQUERY_EMULATOR_DATA_DIR populates --data-dir.
//
// All three are still overridable from the command line; the env
// vars are only consulted when the operator did not say anything.
package main

import (
        "errors"
        "flag"
        "fmt"
        "io"
        "strconv"
        "strings"
)

// Config is the normalized result of parsing argv. All operator-facing
// CLI flags collapse to one field here so the rest of the program can
// read settings without juggling pointer indirection or alias bookkeeping.
type Config struct {
        // VersionRequested is true when `--version` was passed; main()
        // short-circuits before any side effect.
        VersionRequested bool

        // ListenHost is the host the HTTP gateway binds to. Maps to
        // `--listen-host` / `--hostname`.
        ListenHost string

        // HTTPPort is the BigQuery REST listener port. Maps to
        // `--http-port` / `--http_port`.
        HTTPPort int

        // GRPCPort is the engine gRPC port. The gateway also dials this
        // port via the loopback interface. Maps to `--grpc-port` /
        // `--grpc_port`.
        GRPCPort int

        // EngineBinary is the absolute or basename path to the C++ engine
        // subprocess. Empty disables the subprocess (gateway-only /
        // unit-test mode). Maps to `--engine-binary` / `--engine_binary`.
        EngineBinary string

        // DataDir is the persistent storage root. Forwarded to the engine
        // as `--data_dir`. Maps to `--data-dir` / `--data_dir` /
        // `BIGQUERY_EMULATOR_DATA_DIR`.
        DataDir string

        // LegacyDatabase is the removed recidiviz/goccy `--database` flag
        // (single SQLite file). When set, parseArgs maps it to DataDir =
        // filepath.Dir(LegacyDatabase) and emits a migration warning.
        LegacyDatabase string

        // StartupWarnings holds operator-facing migration / layout notices
        // collected during CLI parsing. main() logs them before binding.
        StartupWarnings []string

        // InitialDataDir is a template directory the gateway copies into
        // DataDir on first start (when DataDir does not yet contain an
        // initialized catalog). Maps to `--initial-data-dir` /
        // `BIGQUERY_EMULATOR_INITIAL_DATA_DIR` / `EMULATOR_INITIAL_DATA_DIR`.
        InitialDataDir string

        // CopyEngineStdout / CopyEngineStderr forward the engine
        // subprocess's stdio to the gateway's. Maps to
        // `--copy-engine-stdout` / `--copy_engine_stdout` /
        // `--copy-engine-stderr` / `--copy_engine_stderr`.
        CopyEngineStdout bool
        CopyEngineStderr bool

        // LogRequests prints each REST request and response. Maps to
        // `--log-requests` / `--log_requests`.
        LogRequests bool

        // Debug enables verbose lifecycle logging. Maps to `--debug`.
        Debug bool

        // DefaultProjectID is the default project clients act against
        // when seeding or applying YAML data without an explicit project.
        // Maps to `--project-id` / `--project_id`.
        DefaultProjectID string

        // DefaultDatasetID is the server-level fallback dataset used to
        // resolve unqualified (single-segment) table names when a query
        // or job does not carry its own `defaultDataset`. This mirrors
        // setting `default_dataset` on a production BigQuery client/job so
        // `SELECT * FROM t` / `CREATE TABLE t (...)` resolve to
        // `<project>.<DefaultDatasetID>.t`. Maps to `--dataset` /
        // `--dataset-id` / `--dataset_id`.
        DefaultDatasetID string

        // DefaultDatasetLocation is the BigQuery location stamped on
        // datasets created without an explicit location. Maps to
        // `--default-dataset-location`.
        DefaultDatasetLocation string

        // EnableSeedAPI registers the `POST /api/emulator/seed` route
        // and its operation polling endpoint. Off by default for local
        // safety.
        EnableSeedAPI bool

        // SeedAPIAllowRemote allows non-loopback callers to invoke the
        // seed API. Off by default.
        SeedAPIAllowRemote bool

        // SeedAPISeedToken, when non-empty, requires matching header
        // `X-BigQuery-Emulator-Seed-Token` on every seed API request.
        // Falls back to `BIGQUERY_EMULATOR_SEED_TOKEN` when the flag is
        // empty.
        SeedAPISeedToken string

        // EnableSQLToolsAPI registers POST /api/emulator/sql/* routes.
        EnableSQLToolsAPI bool

        // SQLToolsAPIAllowRemote allows non-loopback SQL tools callers.
        SQLToolsAPIAllowRemote bool

        // SQLToolsAPISeedToken protects remote SQL tools access.
        SQLToolsAPISeedToken string

        // SeedFiles is the repeatable list of YAML seed-data files to
        // apply once the engine is SERVING. Maps to `--seed-data-file`
        // / `--seed-yaml`.
        SeedFiles []string
}

// envLookup mirrors `os.LookupEnv` for tests. The default impl simply
// calls into the real env; tests inject a deterministic map so they
// don't depend on the running process's environment.
type envLookup func(key string) (string, bool)

// parseArgs builds a one-shot flag.FlagSet, registers every supported
// flag (with hyphen and underscore aliases), parses argv, and returns
// the resulting Config. errOut is where the FlagSet writes usage and
// error messages on parse failure -- pass os.Stderr in production,
// or a *bytes.Buffer in tests.
//
// The argv parameter does NOT include the program name; pass
// os.Args[1:] to mirror Go's `flag.Parse()` contract.
//
// Internally this composes three steps so the function stays
// readable: applyDefaults seeds the zero-value Config, registerFlags
// wires every supported flag (including hyphen/underscore aliases)
// onto a one-shot FlagSet, and applyEnvFallbacks fills in missing
// values from the operator's environment after the FlagSet wins.
func parseArgs(argv []string, errOut io.Writer, getenv envLookup) (Config, error) {
        if getenv == nil {
                // Test-friendly default that never reads the real env, so
                // parseArgs is deterministic when callers don't pass a
                // fixture explicitly.
                getenv = noEnv
        }

        cfg := defaultConfig()
        fs := flag.NewFlagSet("gateway_main", flag.ContinueOnError)
        fs.SetOutput(errOut)

        versionFlag := false
        registerFlags(fs, &cfg, &versionFlag)

        if err := fs.Parse(argv); err != nil {
                return Config{}, fmt.Errorf("parse flags: %w", err)
        }
        cfg.VersionRequested = versionFlag

        if err := validatePorts(cfg); err != nil {
                return Config{}, err
        }
        applyEnvFallbacks(&cfg, getenv)
        if err := applyLegacyDatabaseFlag(&cfg); err != nil {
                return Config{}, err
        }
        cfg.StartupWarnings = append(cfg.StartupWarnings, collectDataDirLayoutWarnings(cfg.DataDir)...)
        return cfg, nil
}

// noEnv is the stand-in environment lookup parseArgs uses when the
// caller passes nil; it always returns "no such variable" so
// parseArgs is deterministic during tests that don't care about env
// fallbacks.
func noEnv(string) (string, bool) { return "", false }

// defaultConfig returns the seed Config parseArgs starts from. Pulled
// out so the defaults are visible in one place (and so tests that
// drive parseArgs directly can assert against the same baseline).
func defaultConfig() Config {
        return Config{
                ListenHost:       "localhost",
                HTTPPort:         9050,
                GRPCPort:         9060,
                EngineBinary:     "emulator_main",
                CopyEngineStderr: true,
        }
}

// registerFlags wires every supported flag (including hyphen and
// underscore aliases) onto fs. Split out from parseArgs purely so
// the parser body stays under the funlen budget; nothing else
// invokes it.
func registerFlags(fs *flag.FlagSet, cfg *Config, versionFlag *bool) {
        registerString(fs, &cfg.ListenHost, []string{"listen-host", "hostname"},
                "Hostname for the emulator servers.")
        registerInt(fs, &cfg.HTTPPort, []string{"http-port", "http_port", "port"},
                "Port on which to run the BigQuery REST gateway.")
        registerInt(fs, &cfg.GRPCPort, []string{"grpc-port", "grpc_port"},
                "Port on which to run the internal engine gRPC server.")
        registerString(fs, &cfg.EngineBinary, []string{"engine-binary", "engine_binary"},
                "Path to the C++ engine binary. Empty disables the subprocess.")
        registerString(fs, &cfg.DataDir,
                []string{"data-dir", "data_dir"},
                "Persistent storage root. Passed to the engine as --data_dir. "+
                        "Falls back to $BIGQUERY_EMULATOR_DATA_DIR when empty.")
        registerString(fs, &cfg.LegacyDatabase,
                []string{"database"},
                "DEPRECATED (recidiviz/goccy compat): single SQLite catalog file. "+
                        "Mapped to --data-dir=<parent directory>. Prefer --data-dir.")
        registerString(fs, &cfg.InitialDataDir,
                []string{"initial-data-dir"},
                "Template directory copied into --data-dir on first start when "+
                        "--data-dir is empty. Falls back to $BIGQUERY_EMULATOR_INITIAL_DATA_DIR "+
                        "/ $EMULATOR_INITIAL_DATA_DIR.")
        registerBool(fs, &cfg.CopyEngineStdout, []string{"copy-engine-stdout", "copy_engine_stdout"},
                "Forward the engine subprocess's stdout to the gateway's.")
        registerBool(fs, &cfg.CopyEngineStderr, []string{"copy-engine-stderr", "copy_engine_stderr"},
                "Forward the engine subprocess's stderr to the gateway's.")
        registerBool(fs, &cfg.LogRequests, []string{"log-requests", "log_requests"},
                "Log every REST request and response.")
        registerBool(fs, &cfg.Debug, []string{"debug"},
                "Enable verbose lifecycle logging.")
        registerString(fs, &cfg.DefaultProjectID, []string{"project-id", "project_id", "project"},
                "Default BigQuery project clients are assumed to act against.")
        registerString(fs, &cfg.DefaultDatasetID, []string{"dataset", "dataset-id", "dataset_id"},
                "Default dataset used to resolve unqualified table names when a "+
                        "query/job does not set its own defaultDataset (e.g. SELECT * FROM t). "+
                        "Mirrors default_dataset on a production BigQuery client.")
        registerString(fs, &cfg.DefaultDatasetLocation, []string{"default-dataset-location"},
                "Default BigQuery location stamped on datasets created without an "+
                        "explicit location (e.g. US, EU).")
        registerBool(fs, &cfg.EnableSeedAPI, []string{"enable-seed-api"},
                "Register POST /api/emulator/seed and the operation polling endpoint.")
        registerBool(fs, &cfg.SeedAPIAllowRemote, []string{"seed-api-allow-remote"},
                "Allow non-loopback callers to invoke the seed API.")
        registerString(fs, &cfg.SeedAPISeedToken,
                []string{"seed-api-seed-token"},
                "Required value for the X-BigQuery-Emulator-Seed-Token header on every "+
                        "seed request. Falls back to $BIGQUERY_EMULATOR_SEED_TOKEN.")
        registerStringSlice(fs, &cfg.SeedFiles, []string{"seed-data-file", "seed-yaml", "data-from-yaml"},
                "YAML seed-data file to apply once the engine reports SERVING (repeatable).")
        registerBool(fs, &cfg.EnableSQLToolsAPI, []string{"enable-sql-tools-api"},
                "Register POST /api/emulator/sql/* parser/formatter/completion routes.")
        registerBool(fs, &cfg.SQLToolsAPIAllowRemote, []string{"sql-tools-api-allow-remote"},
                "Allow non-loopback callers to invoke the SQL tools API.")
        registerString(fs, &cfg.SQLToolsAPISeedToken,
                []string{"sql-tools-api-token"},
                "Required value for the X-BigQuery-Emulator-SqlTools-Token header on every "+
                        "SQL tools request. Falls back to $BIGQUERY_EMULATOR_SQL_TOOLS_TOKEN.")
        registerBool(fs, versionFlag, []string{"version"},
                "Print version information (semver + git commit + build date + Go toolchain) and exit.")
}

// validatePorts rejects out-of-range HTTP/gRPC ports or the case
// where both happen to be the same. Pulled out of parseArgs so the
// branch is testable and parseArgs stays short.
func validatePorts(cfg Config) error {
        if cfg.HTTPPort <= 0 || cfg.HTTPPort > 65535 {
                return fmt.Errorf("invalid --http-port %d: must be in 1..65535", cfg.HTTPPort)
        }
        if cfg.GRPCPort <= 0 || cfg.GRPCPort > 65535 {
                return fmt.Errorf("invalid --grpc-port %d: must be in 1..65535", cfg.GRPCPort)
        }
        if cfg.HTTPPort == cfg.GRPCPort {
                return fmt.Errorf("--http-port and --grpc-port must differ (both %d)", cfg.HTTPPort)
        }
        return nil
}

// applyEnvFallbacks walks the documented flag > env > nothing
// precedence. Mutates cfg in place. Each env-var name is checked in
// the documented order so the precedence stays observable from the
// source.
func applyEnvFallbacks(cfg *Config, getenv envLookup) {
        if cfg.DataDir == "" {
                if v, ok := getenv("BIGQUERY_EMULATOR_DATA_DIR"); ok {
                        cfg.DataDir = v
                }
        }
        if cfg.InitialDataDir == "" {
                for _, key := range []string{
                        "BIGQUERY_EMULATOR_INITIAL_DATA_DIR",
                        "EMULATOR_INITIAL_DATA_DIR",
                } {
                        if v, ok := getenv(key); ok && v != "" {
                                cfg.InitialDataDir = v
                                break
                        }
                }
        }
        if cfg.SeedAPISeedToken == "" {
                if v, ok := getenv("BIGQUERY_EMULATOR_SEED_TOKEN"); ok {
                        cfg.SeedAPISeedToken = v
                }
        }
        if cfg.SQLToolsAPISeedToken == "" {
                if v, ok := getenv("BIGQUERY_EMULATOR_SQL_TOOLS_TOKEN"); ok {
                        cfg.SQLToolsAPISeedToken = v
                }
        }
}

func (c Config) engineInternalGRPCPort() int {
        // The public BigQuery Storage shim binds --grpc-port; the engine
        // subprocess listens on the next port so both can coexist.
        return c.GRPCPort + 1
}

// ToOptions projects the parsed CLI config onto the gateway.Options
// addresses the runtime consumes. storageGRPCAddr is where client
// libraries dial BIGQUERY_STORAGE_GRPC_ENDPOINT; engineAddr is the
// internal bigquery_emulator.v1 listener the gateway dials.
func (c Config) ToOptions(engineBinary string) (httpAddr, storageGRPCAddr, engineAddr string, engineArgs []string) {
        httpAddr = c.ListenHost + ":" + strconv.Itoa(c.HTTPPort)
        storageGRPCAddr = c.ListenHost + ":" + strconv.Itoa(c.GRPCPort)
        if engineBinary != "" {
                engineAddr = c.ListenHost + ":" + strconv.Itoa(c.engineInternalGRPCPort())
        }
        engineArgs = c.engineCLIArgs()
        return httpAddr, storageGRPCAddr, engineAddr, engineArgs
}

// engineCLIArgs renders the engine pass-through flags as a flat
// `--key value` slice. The engine uses double-hyphen underscore-style
// flags (`--data_dir foo`); operators on the gateway side can supply
// the same flag with either dash convention thanks to the aliasing
// above, but here we always emit the form emulator_main parses.
//
// Empty values are dropped; the gateway never forwards a flag the
// operator didn't set. The engine then keeps its own default.
func (c Config) engineCLIArgs() []string {
        type pair struct{ name, value string }
        pairs := []pair{
                {"--data_dir", c.DataDir},
        }
        args := make([]string, 0, len(pairs)*2)
        for _, p := range pairs {
                if p.value == "" {
                        continue
                }
                args = append(args, p.name, p.value)
        }
        return args
}

// registerString registers a string flag under every name in
// `names`, all pointing at the same target. The first name in the
// slice owns the description that shows up in `--help`; aliases get
// a "(alias for --<first>)" stub so the help output stays scannable.
func registerString(fs *flag.FlagSet, target *string, names []string, desc string) {
        if len(names) == 0 {
                return
        }
        fs.StringVar(target, names[0], *target, desc)
        for _, alias := range names[1:] {
                fs.StringVar(target, alias, *target, "(alias for --"+names[0]+")")
        }
}

// registerInt is the integer twin of registerString. It uses
// stringTarget under the hood so a malformed value writes an error
// to the FlagSet's output and ParseError surfaces to the caller.
func registerInt(fs *flag.FlagSet, target *int, names []string, desc string) {
        if len(names) == 0 {
                return
        }
        fs.IntVar(target, names[0], *target, desc)
        for _, alias := range names[1:] {
                fs.IntVar(target, alias, *target, "(alias for --"+names[0]+")")
        }
}

// registerBool is the boolean twin of registerString. Bool flags
// uniquely accept the `--name` (no value) and `--name=true` /
// `--name=false` forms; both aliases inherit that behavior.
func registerBool(fs *flag.FlagSet, target *bool, names []string, desc string) {
        if len(names) == 0 {
                return
        }
        fs.BoolVar(target, names[0], *target, desc)
        for _, alias := range names[1:] {
                fs.BoolVar(target, alias, *target, "(alias for --"+names[0]+")")
        }
}

// registerStringSlice registers a repeatable string flag under every
// name in `names`. The Go std flag package has no built-in repeat
// support, so we install a tiny flag.Value that appends to the
// target slice on every Set.
func registerStringSlice(fs *flag.FlagSet, target *[]string, names []string, desc string) {
        if len(names) == 0 {
                return
        }
        value := stringSliceValue{target: target}
        fs.Var(&value, names[0], desc)
        for _, alias := range names[1:] {
                fs.Var(&value, alias, "(alias for --"+names[0]+")")
        }
}

// stringSliceValue is the flag.Value implementation used by
// registerStringSlice. Each `--seed-data-file foo` appends "foo" to
// the target slice in the order the operator supplied them, which is
// the order the seed loader applies them.
type stringSliceValue struct {
        target *[]string
}

func (s *stringSliceValue) String() string {
        if s == nil || s.target == nil {
                return ""
        }
        return strings.Join(*s.target, ",")
}

func (s *stringSliceValue) Set(v string) error {
        if s == nil || s.target == nil {
                return errors.New("stringSliceValue: nil target")
        }
        *s.target = append(*s.target, v)
        return nil
}

// gateway_main is the BigQuery emulator's REST gateway entry point.
//
// It is structurally analogous to cloud-spanner-emulator's gateway_main:
// the C++ engine (emulator_main) implements SQL semantics on top of
// GoogleSQL, and this Go binary fronts it with a BigQuery-shaped REST API.
// On startup the gateway spawns the engine as a subprocess and shuts it
// down on exit.
//
// # CLI surface
//
// The parser is broken out into cli.go and supports both the legacy
// underscore-separated flag names this repository started with
// (`--http_port`) and the hyphen-separated equivalents
// (`--http-port`) documented for gateway_main. Every
// new operator-facing flag (data dir, engine pass-through, seed API,
// seed YAML files) is registered there; this file only wires the
// parsed Config into the gateway runtime.
package main

import (
        "errors"
        "flag"
        "fmt"
        "io"
        "log" //nolint:depguard // process-launch + version-print error paths use stdlib log; gateway runtime emits structured slog via opts.Logger
        "log/slog"
        "os"
        "path"
        "path/filepath"
        "runtime"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway"
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
        "github.com/vantaboard/bigquery-emulator/gateway/seedfile"
        "github.com/vantaboard/bigquery-emulator/gateway/storagetmpl"
)

// logStartupWarnings prints migration/layout notices from CLI parsing.
// Messages prefixed with "ERROR:" fail startup so operators do not silently
// point --data-dir at a legacy single-file catalog path.
func logStartupWarnings(warnings []string) error {
        for _, msg := range warnings {
                if after, ok := strings.CutPrefix(msg, "ERROR: "); ok {
                        return fmt.Errorf("%s", after)
                }
                log.Print(msg)
        }
        return nil
}

// Version metadata. The defaults (`dev` / `none` / `unknown`) are what a
// plain `go build` produces; release builds replace them via
// `-X main.version=... -X main.commit=... -X main.date=...` ldflags
// (see `.goreleaser.yml` and `taskfiles/emulator.yml`'s `gateway:build`
// helper). Keep these as `var` (not `const`) so the linker can overwrite
// them — `const string` cannot be ldflag-injected.
var (
        version = "dev"
        commit  = "none"
        date    = "unknown"
)

// printVersion writes the multi-line version block to w. Pulled out
// into its own function so unit tests can drive it with a
// `bytes.Buffer` rather than fork a process. The format intentionally
// mirrors cloud-spanner-emulator's `gateway_main --version` shape (one
// title line, then indented `key: value` rows) so operators who know
// one emulator can read the other.
func printVersion(w io.Writer) {
        // Writes into io.Writer can in principle fail (e.g. broken
        // pipe when `gateway_main --version | head -1` closes early),
        // but there's no meaningful recovery here -- the process is
        // about to exit. Discard the errcheck warnings rather than
        // pad each Fprintf with a no-op handler.
        _, _ = fmt.Fprintf(w, "bigquery-emulator-gateway version %s\n", version)
        _, _ = fmt.Fprintf(w, "  commit:  %s\n", commit)
        _, _ = fmt.Fprintf(w, "  built:   %s\n", date)
        _, _ = fmt.Fprintf(w, "  go:      %s\n", runtime.Version())
        _, _ = fmt.Fprintf(w, "  os/arch: %s/%s\n", runtime.GOOS, runtime.GOARCH)
}

// resolveEngineBinary mirrors the resolution logic from
// cloud-spanner-emulator: accept an absolute path as-is, otherwise look in
// the gateway binary's directory and its parent. Returns "" if disabled.
func resolveEngineBinary(name string) string {
        if name == "" {
                return ""
        }
        if path.IsAbs(name) {
                return name
        }

        gwPath, err := os.Executable()
        if err != nil {
                log.Fatalf("could not resolve own executable path: %v", err)
        }
        gwDir := filepath.Dir(gwPath)

        candidate := filepath.Join(gwDir, name)
        if _, err := os.Stat(candidate); err == nil {
                return candidate
        }
        candidate = filepath.Join(filepath.Dir(gwDir), name)
        if _, err := os.Stat(candidate); err == nil {
                return candidate
        }
        log.Fatalf("could not locate engine binary %q in %q or its parent",
                name, gwDir)
        return ""
}

func main() {
        cfg, err := parseArgs(os.Args[1:], os.Stderr, os.LookupEnv)
        if err != nil {
                if errors.Is(err, flag.ErrHelp) {
                        os.Exit(0)
                }
                log.Fatal(err)
        }
        if cfg.VersionRequested {
                printVersion(os.Stdout)
                return
        }
        if err := logStartupWarnings(cfg.StartupWarnings); err != nil {
                log.Fatal(err)
        }
        if err := runGateway(cfg); err != nil {
                log.Fatal(err)
        }
}

func runGateway(cfg Config) error {
        log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)

        httpAddr, storageGRPCAddr, engineAddr, engineArgs := cfg.ToOptions(cfg.EngineBinary)

        logLevel := slog.LevelInfo
        if cfg.Debug {
                logLevel = slog.LevelDebug
        }
        gatewayLogger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
                Level: logLevel,
        }))

        opts := gateway.Options{
                HTTPAddress:            httpAddr,
                StorageGRPCAddress:     storageGRPCAddr,
                EngineAddress:          engineAddr,
                EngineBinary:           resolveEngineBinary(cfg.EngineBinary),
                EngineArgs:             engineArgs,
                CopyEngineStdout:       cfg.CopyEngineStdout,
                CopyEngineStderr:       cfg.CopyEngineStderr,
                LogRequests:            cfg.LogRequests,
                DefaultProjectID:       cfg.DefaultProjectID,
                DefaultDatasetID:       cfg.DefaultDatasetID,
                DefaultDatasetLocation: cfg.DefaultDatasetLocation,
                EnableSeedAPI:          cfg.EnableSeedAPI,
                SeedAPIAllowRemote:     cfg.SeedAPIAllowRemote,
                SeedAPISeedToken:       cfg.SeedAPISeedToken,
                EnableSQLToolsAPI:      cfg.EnableSQLToolsAPI,
                SQLToolsAPIAllowRemote: cfg.SQLToolsAPIAllowRemote,
                SQLToolsAPISeedToken:   cfg.SQLToolsAPISeedToken,
                SeedFiles:              cfg.SeedFiles,
                DataDir:                cfg.DataDir,
                InitialDataDir:         cfg.InitialDataDir,
                Debug:                  cfg.Debug,
                Logger:                 gatewayLogger,
        }

        gw := gateway.New(opts).
                WithPreStartHook(func(o gateway.Options) error {
                        return storagetmpl.MaybeMaterialize(o.InitialDataDir, o.DataDir)
                }).
                WithPostEngineHook(func(o gateway.Options, ec *engine.Client) error {
                        if len(o.SeedFiles) == 0 || ec == nil {
                                return nil
                        }
                        return seedfile.ApplyFiles(o.SeedFiles,
                                seed.NewCatalogApplier(ec.Catalog),
                                gateway.DefaultsFromOptions(o))
                })
        return gw.Run()
}

// Legacy --database flag handling and data-dir layout warnings for operators
// migrating from the recidiviz/goccy single-file SQLite catalog to this
// emulator's directory-based DuckDB layout.
package main

import (
        "fmt"
        "os"
        "path/filepath"
        "strings"
)

const legacyDatabaseMigrationDoc = "docs/REST_API.md#persistence-and-data-dir"

// applyLegacyDatabaseFlag maps the removed recidiviz/goccy --database=file.db
// flag onto --data-dir=<parent> and appends an actionable deprecation warning.
func applyLegacyDatabaseFlag(cfg *Config) error {
        if cfg.LegacyDatabase == "" {
                return nil
        }
        if cfg.DataDir != "" {
                return fmt.Errorf(
                        "cannot use both --database and --data-dir; replace --database=%q with --data-dir=%q (see %s)",
                        cfg.LegacyDatabase,
                        filepath.Dir(cfg.LegacyDatabase),
                        legacyDatabaseMigrationDoc,
                )
        }
        cfg.DataDir = filepath.Dir(cfg.LegacyDatabase)
        cfg.StartupWarnings = append(cfg.StartupWarnings,
                fmt.Sprintf(
                        "DEPRECATED: --database is removed. The recidiviz/goccy fork stored catalog state "+
                                "in a single SQLite file (%q); this emulator persists under a directory "+
                                "(--data-dir) with catalog.duckdb and sidecar parquet/meta.json files. "+
                                "Mapped --database -> --data-dir=%q. Data in the old single-file format is "+
                                "not automatically loaded; mount the volume at --data-dir and migrate or "+
                                "re-seed if needed. See %s.",
                        cfg.LegacyDatabase,
                        cfg.DataDir,
                        legacyDatabaseMigrationDoc,
                ),
        )
        return nil
}

// collectDataDirLayoutWarnings scans an on-disk data-dir for layouts that
// suggest an operator pointed --data-dir at a legacy single-file catalog or
// left orphaned SQLite files on a shared volume after switching flags.
func collectDataDirLayoutWarnings(dataDir string) []string {
        if dataDir == "" {
                return nil
        }
        info, err := os.Stat(dataDir)
        if err != nil {
                if os.IsNotExist(err) {
                        return nil
                }
                return []string{fmt.Sprintf("WARN: cannot stat --data-dir %q: %v", dataDir, err)}
        }
        if !info.IsDir() {
                return []string{
                        fmt.Sprintf(
                                "ERROR: --data-dir %q is a file, not a directory. The recidiviz/goccy "+
                                        "--database=/path/catalog.db flag pointed at a single SQLite file; "+
                                        "this emulator expects --data-dir=/parent/directory. See %s.",
                                dataDir,
                                legacyDatabaseMigrationDoc,
                        ),
                }
        }
        hasCatalog, legacyDB, readErr := scanDataDirRoot(dataDir)
        if readErr != "" {
                return []string{readErr}
        }
        return legacyDataDirWarnings(dataDir, hasCatalog, legacyDB)
}

func scanDataDirRoot(dataDir string) (hasCatalog bool, legacyDB []string, readErr string) {
        entries, err := os.ReadDir(dataDir)
        if err != nil {
                return false, nil, fmt.Sprintf("WARN: cannot read --data-dir %q: %v", dataDir, err)
        }
        for _, e := range entries {
                if e.IsDir() {
                        continue
                }
                name := e.Name()
                if name == "catalog.duckdb" {
                        hasCatalog = true
                        continue
                }
                lower := strings.ToLower(name)
                if strings.HasSuffix(lower, ".db") ||
                        strings.HasSuffix(lower, ".sqlite") ||
                        strings.HasSuffix(lower, ".sqlite3") {
                        legacyDB = append(legacyDB, name)
                }
        }
        return hasCatalog, legacyDB, ""
}

func legacyDataDirWarnings(dataDir string, hasCatalog bool, legacyDB []string) []string {
        if len(legacyDB) == 0 {
                return nil
        }
        if hasCatalog {
                return []string{
                        fmt.Sprintf(
                                "WARN: --data-dir %q contains legacy single-file database(s) %v alongside "+
                                        "catalog.duckdb; the old SQLite files are ignored. Safe to delete after "+
                                        "confirming catalog.duckdb has your data.",
                                dataDir,
                                legacyDB,
                        ),
                }
        }
        return []string{
                fmt.Sprintf(
                        "WARN: --data-dir %q contains file(s) %v that look like the recidiviz/goccy "+
                                "single-file SQLite catalog, but no catalog.duckdb from this emulator. "+
                                "State from the old format is not loaded automatically; point --data-dir "+
                                "at an empty directory or re-seed. See %s.",
                        dataDir,
                        legacyDB,
                        legacyDatabaseMigrationDoc,
                ),
        }
}

// differential-record captures production BigQuery output for the differential corpus.
package main

import (
        "context"
        "encoding/json"
        "errors"
        "flag"
        "fmt"
        "os"
        "os/signal"
        "path/filepath"
        "strconv"
        "strings"
        "syscall"
        "time"

        "cloud.google.com/go/bigquery"
        "github.com/vantaboard/bigquery-emulator/conformance/differential"
        "github.com/vantaboard/bigquery-emulator/conformance/runner"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "google.golang.org/api/iterator"
)

func main() {
        if err := run(); err != nil {
                _, _ = fmt.Fprintln(os.Stderr, "differential-record:", err)
                os.Exit(2)
        }
}

func run() error {
        fs := flag.NewFlagSet("differential-record", flag.ContinueOnError)
        fs.SetOutput(os.Stderr)
        corpus := fs.String("corpus", differential.DefaultCorpusDir, "corpus directory or single YAML")
        oracleDir := fs.String("oracle-dir", differential.DefaultOracleDir, "directory to write oracle JSON files")
        project := fs.String("project", "", "GCP project (default: BIGQUERY_DIFFERENTIAL_PROJECT)")
        dryRun := fs.Bool("dry-run", false, "print actions without writing oracle files")
        if err := fs.Parse(os.Args[1:]); err != nil {
                if errors.Is(err, flag.ErrHelp) {
                        return nil
                }
                return err
        }

        projectID := strings.TrimSpace(*project)
        if projectID == "" {
                projectID = strings.TrimSpace(os.Getenv("BIGQUERY_DIFFERENTIAL_PROJECT"))
        }
        if projectID == "" {
                printSkipInstructions()
                return nil
        }

        cases, err := differential.LoadCorpusDir(*corpus, false)
        if err != nil {
                return err
        }

        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
        sigCh := make(chan os.Signal, 1)
        signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
        go func() {
                <-sigCh
                cancel()
        }()
        defer signal.Stop(sigCh)

        client, err := bigquery.NewClient(ctx, projectID)
        if err != nil {
                return fmt.Errorf("bigquery client: %w", err)
        }
        defer func() { _ = client.Close() }()

        for _, c := range cases {
                if err := recordCase(ctx, client, projectID, *oracleDir, c, *dryRun); err != nil {
                        return fmt.Errorf("%s: %w", c.Name, err)
                }
        }
        return nil
}

func printSkipInstructions() {
        _, _ = fmt.Fprintln(os.Stderr, `differential-record: skipped — no GCP project configured.

Set BIGQUERY_DIFFERENTIAL_PROJECT to a project where you can create ephemeral
datasets, or pass --project=<id>. Application Default Credentials must be
available (gcloud auth application-default login).

The committed oracle JSON under conformance/differential/oracle/ is what CI
replays; recording is manual/opt-in. When GCP access is unavailable, pin
oracle expectations from bq query output and mark oracle_source: bq-cli in
the corpus YAML (see .cursor/rules/conformance-bq-validation.mdc).`)
}

func recordCase(
        ctx context.Context,
        client *bigquery.Client,
        project, oracleDir string,
        c *differential.CorpusCase,
        dryRun bool,
) error {
        dsID := fmt.Sprintf("diff_record_%s_%d", sanitizeDatasetID(c.Name), time.Now().Unix())
        ds := client.Dataset(dsID)
        if err := ds.Create(ctx, &bigquery.DatasetMetadata{Location: "US"}); err != nil {
                return fmt.Errorf("create dataset %s: %w", dsID, err)
        }
        defer func() { _ = ds.DeleteWithContents(ctx) }()

        if err := applySetup(ctx, client, project, dsID, c); err != nil {
                return err
        }
        return captureQueryOracle(ctx, client, project, dsID, oracleDir, c, dryRun)
}

func captureQueryOracle(
        ctx context.Context,
        client *bigquery.Client,
        project, dsID, oracleDir string,
        c *differential.CorpusCase,
        dryRun bool,
) error {
        q := client.Query(c.Query)
        q.DefaultProjectID = project
        q.DefaultDatasetID = dsID
        if c.DefaultDataset != "" {
                q.DefaultDatasetID = c.DefaultDataset
        }
        if len(c.QueryParameters) > 0 {
                q.Parameters = toBQParams(c.QueryParameters)
        }

        job, err := q.Run(ctx)
        if err != nil {
                return fmt.Errorf("run query: %w", err)
        }
        status, err := job.Wait(ctx)
        if err != nil {
                return fmt.Errorf("wait job: %w", err)
        }
        jobID := job.ID()

        if status.Err() != nil {
                o := &differential.Oracle{
                        Project: project, OracleSource: "recorded", Match: c.Match,
                        Success: false, JobID: jobID,
                        Error: &differential.OracleError{Message: status.Err().Error()},
                }
                return writeOracle(oracleDir, c, o, dryRun)
        }

        it, err := job.Read(ctx)
        if err != nil {
                return fmt.Errorf("read results: %w", err)
        }
        schema, rows, err := readAllRows(it)
        if err != nil {
                return err
        }
        o := &differential.Oracle{
                Project: project, OracleSource: "recorded", Match: c.Match,
                Success: true, JobID: jobID, Schema: schema, Rows: rows,
                JobReference: &bqtypes.JobReference{
                        ProjectID: project, JobID: jobID, Location: "US",
                },
        }
        return writeOracle(oracleDir, c, o, dryRun)
}

func writeOracle(oracleDir string, c *differential.CorpusCase, o *differential.Oracle, dryRun bool) error {
        path := filepath.Join(oracleDir, c.OracleRef)
        if dryRun {
                _, _ = fmt.Fprintf(os.Stderr, "would write oracle %s for %s\n", path, c.Name)
                return nil
        }
        if err := os.MkdirAll(oracleDir, 0o750); err != nil {
                return err
        }
        return differential.WriteOracle(path, o)
}

func applySetup(ctx context.Context, client *bigquery.Client, project, dsID string, c *differential.CorpusCase) error {
        for i, step := range c.Setup {
                switch {
                case step.Dataset != "":
                        target := client.Dataset(step.Dataset)
                        if err := target.Create(ctx, &bigquery.DatasetMetadata{Location: "US"}); err != nil {
                                return fmt.Errorf("setup[%d] dataset %s: %w", i, step.Dataset, err)
                        }
                case step.Table != nil:
                        if err := createTable(ctx, client, step.Table); err != nil {
                                return fmt.Errorf("setup[%d] table: %w", i, err)
                        }
                case step.Rows != nil:
                        if err := insertRows(ctx, client, step.Rows); err != nil {
                                return fmt.Errorf("setup[%d] rows: %w", i, err)
                        }
                case strings.TrimSpace(step.SQL) != "":
                        q := client.Query(step.SQL)
                        q.DefaultProjectID = project
                        q.DefaultDatasetID = dsID
                        job, err := q.Run(ctx)
                        if err != nil {
                                return fmt.Errorf("setup[%d] sql run: %w", i, err)
                        }
                        if st, err := job.Wait(ctx); err != nil {
                                return fmt.Errorf("setup[%d] sql wait: %w", i, err)
                        } else if st.Err() != nil {
                                return fmt.Errorf("setup[%d] sql error: %w", i, st.Err())
                        }
                default:
                        return fmt.Errorf("setup[%d]: unsupported step for recorder", i)
                }
        }
        return nil
}

func createTable(ctx context.Context, client *bigquery.Client, t *runner.TableSetup) error {
        meta := &bigquery.TableMetadata{}
        if t.View != nil {
                meta.ViewQuery = t.View.Query
        } else if len(t.Schema) > 0 {
                meta.Schema = toBQSchema(t.Schema)
        }
        table := client.Dataset(t.Dataset).Table(t.ID)
        return table.Create(ctx, meta)
}

func insertRows(ctx context.Context, client *bigquery.Client, rs *runner.RowsSetup) error {
        inserter := client.Dataset(rs.Dataset).Table(rs.Table).Inserter()
        return inserter.Put(ctx, rs.Rows)
}

func toBQSchema(cols []runner.SchemaColumn) bigquery.Schema {
        out := make(bigquery.Schema, 0, len(cols))
        for _, c := range cols {
                fs := &bigquery.FieldSchema{
                        Name: c.Name,
                        Type: bigquery.FieldType(strings.ToUpper(c.Type)),
                }
                switch strings.ToUpper(c.Mode) {
                case "REQUIRED":
                        fs.Required = true
                case "REPEATED":
                        fs.Repeated = true
                }
                out = append(out, fs)
        }
        return out
}

func toBQParams(params []differential.QueryParameterYAML) []bigquery.QueryParameter {
        out := make([]bigquery.QueryParameter, 0, len(params))
        for _, p := range params {
                out = append(out, bigquery.QueryParameter{
                        Name:  p.Name,
                        Value: p.Value,
                })
        }
        return out
}

func readAllRows(it *bigquery.RowIterator) (*bqtypes.TableSchema, []bqtypes.Row, error) {
        schema := wireSchema(it.Schema)
        var rows []bqtypes.Row
        for {
                var vals []bigquery.Value
                err := it.Next(&vals)
                if errors.Is(err, iterator.Done) {
                        break
                }
                if err != nil {
                        return nil, nil, err
                }
                row := bqtypes.Row{F: make([]bqtypes.Cell, len(vals))}
                for i, v := range vals {
                        row.F[i] = bqtypes.Cell{V: wireCell(v)}
                }
                rows = append(rows, row)
        }
        return schema, rows, nil
}

func wireSchema(s bigquery.Schema) *bqtypes.TableSchema {
        if s == nil {
                return nil
        }
        fields := make([]bqtypes.TableFieldSchema, len(s))
        for i, f := range s {
                mode := "NULLABLE"
                if f.Repeated {
                        mode = "REPEATED"
                } else if f.Required {
                        mode = "REQUIRED"
                }
                fields[i] = bqtypes.TableFieldSchema{
                        Name: f.Name,
                        Type: string(f.Type),
                        Mode: mode,
                }
        }
        return &bqtypes.TableSchema{Fields: fields}
}

func wireCell(v bigquery.Value) any {
        if v == nil {
                return nil
        }
        switch x := v.(type) {
        case string:
                return x
        case int64:
                return strconv.FormatInt(x, 10)
        case float64:
                return fmt.Sprintf("%g", x)
        case bool:
                if x {
                        return "true"
                }
                return "false"
        default:
                b, _ := json.Marshal(v)
                return string(b)
        }
}

func sanitizeDatasetID(name string) string {
        var b strings.Builder
        for _, r := range strings.ToLower(name) {
                if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' {
                        b.WriteRune(r)
                } else {
                        b.WriteRune('_')
                }
        }
        out := b.String()
        if len(out) > 40 {
                out = out[:40]
        }
        return out
}

// differential replays committed production-BigQuery oracles against the emulator.
package main

import (
        "context"
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
        "os/signal"
        "path/filepath"
        "syscall"

        "github.com/vantaboard/bigquery-emulator/conformance/differential"
        "github.com/vantaboard/bigquery-emulator/conformance/runner"
)

func main() {
        code, err := run()
        if err != nil {
                _, _ = fmt.Fprintln(os.Stderr, "differential:", err)
                os.Exit(2)
        }
        if code != 0 {
                os.Exit(code)
        }
}

func run() (int, error) {
        fs := flag.NewFlagSet("differential", flag.ContinueOnError)
        fs.SetOutput(os.Stderr)
        corpus := fs.String("corpus", differential.DefaultCorpusDir, "corpus directory or single YAML")
        oracleDir := fs.String("oracle-dir", differential.DefaultOracleDir, "directory of committed oracle JSON files")
        engineBinary := fs.String("engine-binary", "./bin/emulator_main", "path to emulator_main")
        connect := fs.String("connect", "", "HOST:PORT of a running gateway (mutually exclusive with --engine-binary)")
        profile := fs.String("profile", "duckdb", "runtime profile")
        output := fs.String("output", "text", "output format: text or json")
        outputFile := fs.String("output-file", "", "tee report to this file (atomic write)")
        includeSelfTest := fs.Bool("include-selftest", false, "run _-prefixed self-test corpus files")
        if err := fs.Parse(os.Args[1:]); err != nil {
                if errors.Is(err, flag.ErrHelp) {
                        return 0, nil
                }
                return 0, err
        }
        if *engineBinary != "" && *connect != "" && *engineBinary != "./bin/emulator_main" {
                return 0, errors.New("--engine-binary and --connect are mutually exclusive")
        }
        if *connect != "" {
                *engineBinary = ""
        }

        out, cleanup, err := setupOutputFile(*outputFile)
        if err != nil {
                return 0, err
        }
        if cleanup != nil {
                defer cleanup()
        }

        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
        sigCh := make(chan os.Signal, 1)
        signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
        go func() {
                <-sigCh
                cancel()
        }()
        defer signal.Stop(sigCh)

        report, err := differential.Run(ctx, differential.Options{
                CorpusDir:       *corpus,
                OracleDir:       *oracleDir,
                IncludeSelfTest: *includeSelfTest,
                Harness: runner.HarnessOptions{
                        EngineBinary:   *engineBinary,
                        ConnectAddress: *connect,
                        EngineStdout:   os.Stderr,
                        EngineStderr:   os.Stderr,
                },
                Profile: *profile,
                Output:  *output,
                Out:     out,
                Err:     os.Stderr,
        })
        if err != nil {
                return 0, err
        }
        return report.ExitCode(), nil
}

func setupOutputFile(path string) (io.Writer, func(), error) {
        if path == "" {
                return os.Stdout, nil, nil
        }
        dir := filepath.Dir(path)
        if dir == "" {
                dir = "."
        }
        tmp, err := os.CreateTemp(dir, ".differential-*.tmp")
        if err != nil {
                return nil, nil, fmt.Errorf("create --output-file tmp: %w", err)
        }
        tmpName := tmp.Name()
        cleanup := func() {
                _ = tmp.Close()
                if err := os.Rename(tmpName, path); err != nil {
                        _, _ = fmt.Fprintln(os.Stderr, "differential: rename --output-file:", err)
                        _ = os.Remove(tmpName)
                }
        }
        return io.MultiWriter(os.Stdout, tmp), cleanup, nil
}

// genbqutils converts a bigquery-utils extractor JSON manifest into native
// conformance YAML fixtures under conformance/thirdparty-fixtures/bigquery_utils/.
package main

import (
        "encoding/json"
        "flag"
        "fmt"
        "io"
        "os"
        "path/filepath"
        "regexp"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
        "gopkg.in/yaml.v3"
)

type manifestCase struct {
        Inputs         []string `json:"inputs,omitempty"`
        ExpectedOutput string   `json:"expected_output"`
        InputColumns   []string `json:"input_columns,omitempty"`
        InputRows      string   `json:"input_rows,omitempty"`
}

type manifestUDF struct {
        Family            string         `json:"family"`
        Name              string         `json:"name"`
        Kind              string         `json:"kind,omitempty"`
        UpstreamSQLX      string         `json:"upstream_sqlx"`
        UpstreamTestCases string         `json:"upstream_test_cases"`
        CreateSQL         string         `json:"create_sql"`
        Cases             []manifestCase `json:"cases"`
}

type manifest struct {
        SourceSHA string        `json:"source_sha"`
        Emitted   []manifestUDF `json:"emitted"`
        Skipped   []struct {
                Family string `json:"family"`
                Name   string `json:"name"`
                Reason string `json:"reason"`
        } `json:"skipped"`
}

var nonAlnum = regexp.MustCompile(`[^a-z0-9]+`)

func main() {
        outDir := flag.String(
                "out-dir",
                "conformance/thirdparty-fixtures/bigquery_utils/known_failing",
                "output root (wiped each run)",
        )
        flag.Parse()

        data, err := io.ReadAll(os.Stdin)
        if err != nil {
                fatal("read stdin: %v", err)
        }
        var m manifest
        if unmarshalErr := json.Unmarshal(data, &m); unmarshalErr != nil {
                fatal("parse manifest: %v", unmarshalErr)
        }

        root, err := repoRoot()
        if err != nil {
                fatal("%v", err)
        }
        absOut := *outDir
        if !filepath.IsAbs(absOut) {
                absOut = filepath.Join(root, absOut)
        }

        if err := wipeDir(absOut); err != nil {
                fatal("wipe %s: %v", absOut, err)
        }

        for _, udf := range m.Emitted {
                if err := writeFixture(root, absOut, m.SourceSHA, udf); err != nil {
                        fatal("write %s/%s: %v", udf.Family, udf.Name, err)
                }
        }

        fmt.Fprintf(os.Stderr, "genbqutils: wrote %d fixtures to %s\n", len(m.Emitted), absOut)
}

func repoRoot() (string, error) {
        wd, err := os.Getwd()
        if err != nil {
                return "", err
        }
        dir := wd
        for {
                if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil {
                        return dir, nil
                }
                parent := filepath.Dir(dir)
                if parent == dir {
                        return "", fmt.Errorf("could not find repo root from %s", wd)
                }
                dir = parent
        }
}

func wipeDir(dir string) error {
        if err := os.RemoveAll(dir); err != nil {
                return err
        }
        return os.MkdirAll(dir, 0o750)
}

func fixtureName(family, name string) string {
        parts := []string{"bqutils"}
        for seg := range strings.SplitSeq(family, "/") {
                if seg != "" {
                        parts = append(parts, seg)
                }
        }
        parts = append(parts, name)
        raw := strings.Join(parts, "_")
        return nonAlnum.ReplaceAllString(strings.ToLower(raw), "_")
}

func projectID(name string) string {
        slug := nonAlnum.ReplaceAllString(strings.ToLower(name), "-")
        slug = strings.Trim(slug, "-")
        if slug == "" {
                slug = "udf"
        }
        return "proj-bqutils-" + slug
}

func buildQuery(udf manifestUDF) string {
        if udf.Kind == "udaf" {
                return buildUdafQuery(udf)
        }
        var b strings.Builder
        b.WriteString("WITH cases AS (\n")
        for i, tc := range udf.Cases {
                if i > 0 {
                        b.WriteString("  UNION ALL\n")
                }
                args := strings.Join(tc.Inputs, ", ")
                fmt.Fprintf(&b, "  SELECT %d AS case_id, TO_JSON_STRING(%s(%s)) AS actual, TO_JSON_STRING(%s) AS expected\n",
                        i, udf.Name, args, tc.ExpectedOutput)
        }
        b.WriteString(")\n")
        // NULL = NULL is UNKNOWN in SQL; treat two NULL JSON strings as equal.
        b.WriteString(
                "SELECT case_id, IFNULL(actual = expected, actual IS NULL AND expected IS NULL) AS matches FROM cases ORDER BY case_id\n",
        )
        return b.String()
}

func buildUdafQuery(udf manifestUDF) string {
        var b strings.Builder
        b.WriteString("WITH cases AS (\n")
        for i, tc := range udf.Cases {
                if i > 0 {
                        b.WriteString("  UNION ALL\n")
                }
                var aggCols []string
                var udafArgs []string
                aggIdx := 0
                for _, col := range tc.InputColumns {
                        if strings.Contains(col, " NOT AGGREGATE") {
                                lit := strings.TrimSpace(strings.Split(col, " NOT AGGREGATE")[0])
                                udafArgs = append(udafArgs, lit)
                                continue
                        }
                        alias := fmt.Sprintf("test_input_%d", aggIdx)
                        aggCols = append(aggCols, fmt.Sprintf("%s AS %s", col, alias))
                        udafArgs = append(udafArgs, alias)
                        aggIdx++
                }
                fromClause := tc.InputRows
                if len(aggCols) > 0 {
                        fromClause = fmt.Sprintf("SELECT %s FROM (%s)", strings.Join(aggCols, ", "), tc.InputRows)
                }
                fmt.Fprintf(&b,
                        "  SELECT %d AS case_id, TO_JSON_STRING(%s(%s)) AS actual, TO_JSON_STRING(%s) AS expected\n  FROM (%s)\n",
                        i, udf.Name, strings.Join(udafArgs, ", "), tc.ExpectedOutput, fromClause)
        }
        b.WriteString(")\n")
        b.WriteString(
                "SELECT case_id, IFNULL(actual = expected, actual IS NULL AND expected IS NULL) AS matches FROM cases ORDER BY case_id\n",
        )
        return b.String()
}

func kindLabel(kind string) string {
        if kind == "udaf" {
                return "UDAF"
        }
        return "UDF"
}

func buildFixture(udf manifestUDF) runner.Fixture {
        rows := make([]map[string]any, len(udf.Cases))
        for i := range udf.Cases {
                rows[i] = map[string]any{
                        "case_id": strconv.Itoa(i),
                        "matches": true,
                }
        }
        return runner.Fixture{
                Name: fixtureName(udf.Family, udf.Name),
                Description: fmt.Sprintf(
                        "bigquery-utils %s %s %s (%d cases)",
                        udf.Family,
                        kindLabel(udf.Kind),
                        udf.Name,
                        len(udf.Cases),
                ),
                Profiles:  []string{runner.ProfileDuckDB},
                ProjectID: projectID(udf.Name),
                Setup: []runner.SetupStep{
                        {SQL: strings.TrimSpace(udf.CreateSQL)},
                },
                Query: buildQuery(udf),
                Expected: runner.Expectation{
                        Match: runner.MatchOrdered,
                        Rows:  rows,
                },
        }
}

func provenanceHeader(sha string, udf manifestUDF) string {
        if sha == "" {
                sha = "unknown"
        }
        return fmt.Sprintf(
                "# Source: GoogleCloudPlatform/bigquery-utils @ %s\n"+
                        "#   %s (+ %s)\n"+
                        "# License: Apache-2.0. Generated by scripts/sync_bigquery_utils_udfs.sh; do not edit by hand.\n",
                sha, udf.UpstreamSQLX, filepath.Base(udf.UpstreamTestCases),
        )
}

func marshalFixture(f runner.Fixture) ([]byte, error) {
        var body strings.Builder
        enc := yaml.NewEncoder(&body)
        enc.SetIndent(2)
        if err := enc.Encode(&f); err != nil {
                return nil, err
        }
        if err := enc.Close(); err != nil {
                return nil, err
        }
        return []byte(body.String()), nil
}

func writeFixture(repoRoot, outRoot, sha string, udf manifestUDF) error {
        f := buildFixture(udf)
        body, err := marshalFixture(f)
        if err != nil {
                return err
        }

        outPath := filepath.Join(outRoot, udf.Family, udf.Name+".yaml")
        if mkdirErr := os.MkdirAll(filepath.Dir(outPath), 0o750); mkdirErr != nil {
                return mkdirErr
        }

        var out strings.Builder
        out.WriteString(provenanceHeader(sha, udf))
        out.Write(body)
        content := []byte(out.String())

        // Round-trip through the runner loader so schema drift fails fast.
        tmp, err := os.CreateTemp(repoRoot, ".tmp-genbqutils-*.yaml")
        if err != nil {
                return err
        }
        tmpPath := tmp.Name()
        defer func() {
                _ = os.Remove(tmpPath)
        }()
        if _, err := tmp.Write(content); err != nil {
                _ = tmp.Close()
                return err
        }
        if err := tmp.Close(); err != nil {
                return err
        }
        if _, err := runner.Load(tmpPath); err != nil {
                return fmt.Errorf("runner.Load: %w", err)
        }

        return os.WriteFile(outPath, content, 0o600)
}

func fatal(format string, args ...any) {
        fmt.Fprintf(os.Stderr, "genbqutils: "+format+"\n", args...)
        os.Exit(1)
}

// googlesql-corpus drives the vendored GoogleSQL compliance .test subset
// through jobs.query and diffs results with the fixture lane's typed-cell
// comparator.
package main

import (
        "context"
        "encoding/json"
        "errors"
        "flag"
        "fmt"
        "os"
        "os/signal"
        "syscall"

        "github.com/vantaboard/bigquery-emulator/conformance/googlesqlcorpus"
        "github.com/vantaboard/bigquery-emulator/conformance/runner"
)

func main() {
        code, err := run()
        if err != nil {
                _, _ = fmt.Fprintln(os.Stderr, "googlesql-corpus:", err)
                os.Exit(2)
        }
        if code != 0 {
                os.Exit(code)
        }
}

type cliConfig struct {
        corpusDir    string
        manifestPath string
        engineBinary string
        profile      string
        gatePinned   bool
        triage       bool
        output       string
        outputFile   string
}

func run() (int, error) {
        cfg, err := parseCLI(os.Args[1:])
        if err != nil {
                if errors.Is(err, flag.ErrHelp) {
                        return 0, nil
                }
                return 0, err
        }

        manifest, err := googlesqlcorpus.LoadManifest(cfg.manifestPath)
        if err != nil && !os.IsNotExist(err) {
                return 0, err
        }
        if manifest == nil {
                manifest = &googlesqlcorpus.Manifest{}
        }

        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
        sigCh := make(chan os.Signal, 1)
        signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
        go func() {
                <-sigCh
                cancel()
        }()
        defer signal.Stop(sigCh)

        report, err := googlesqlcorpus.Run(ctx, googlesqlcorpus.Options{
                CorpusDir:  cfg.corpusDir,
                Manifest:   manifest,
                GatePinned: cfg.gatePinned && !cfg.triage,
                TriageMode: cfg.triage,
                Harness: runner.HarnessOptions{
                        EngineBinary: cfg.engineBinary,
                        EngineStdout: os.Stderr,
                        EngineStderr: os.Stderr,
                },
                Profile: cfg.profile,
                Out:     os.Stdout,
                Err:     os.Stderr,
        })
        if err != nil {
                return 0, err
        }

        if cfg.triage {
                updateManifestFromTriage(manifest, report)
                if err := googlesqlcorpus.SaveManifest(cfg.manifestPath, manifest); err != nil {
                        return 0, err
                }
        }

        if err := writeReport(report, cfg.output, cfg.outputFile); err != nil {
                return 0, err
        }

        return report.ExitCode(), nil
}

func parseCLI(args []string) (cliConfig, error) {
        fs := flag.NewFlagSet("googlesql-corpus", flag.ContinueOnError)
        cfg := cliConfig{}
        fs.StringVar(&cfg.corpusDir, "corpus", "conformance/googlesql-corpus/corpus", "directory of vendored .test files")
        fs.StringVar(
                &cfg.manifestPath,
                "manifest",
                "conformance/googlesql-corpus/manifest/pinned.json",
                "pinned-passing manifest",
        )
        fs.StringVar(&cfg.engineBinary, "engine-binary", "./bin/emulator_main", "path to emulator_main")
        fs.StringVar(&cfg.profile, "profile", "duckdb", "conformance profile")
        fs.BoolVar(&cfg.gatePinned, "gate-pinned", true, "only run cases listed in manifest pinned set")
        fs.BoolVar(&cfg.triage, "triage", false, "run all runnable cases and write triage buckets to manifest")
        fs.StringVar(&cfg.output, "output", "text", "text or json")
        fs.StringVar(&cfg.outputFile, "output-file", "", "optional report path")
        if err := fs.Parse(args); err != nil {
                return cliConfig{}, err
        }
        return cfg, nil
}

func writeReport(report *googlesqlcorpus.Report, output, outputFile string) error {
        if output != "json" && outputFile == "" {
                return nil
        }
        b, err := json.MarshalIndent(report, "", "  ")
        if err != nil {
                return err
        }
        if outputFile != "" {
                if err := os.WriteFile(
                        outputFile,
                        append(b, '\n'),
                        0o600,
                ); err != nil { //nolint:gosec // report path is CLI-controlled
                        return err
                }
        }
        if output == "json" {
                fmt.Println(string(b))
        }
        return nil
}

func updateManifestFromTriage(m *googlesqlcorpus.Manifest, report *googlesqlcorpus.Report) {
        if m.Triage == nil {
                m.Triage = make(map[string]googlesqlcorpus.TriageEntry)
        }
        m.Pinned = nil
        for _, r := range report.Results {
                switch r.Status {
                case string(runner.StatusPass):
                        m.Pinned = append(m.Pinned, r.ID)
                        m.Triage[r.ID] = googlesqlcorpus.TriageEntry{Bucket: googlesqlcorpus.BucketPinnedPass}
                case string(runner.StatusSkip):
                        m.Triage[r.ID] = googlesqlcorpus.TriageEntry{Bucket: r.Bucket, Message: r.Message}
                default:
                        m.Triage[r.ID] = googlesqlcorpus.TriageEntry{Bucket: r.Bucket, Message: r.Message}
                }
        }
}

// Binary routing-matrix walks every fixture under
// `conformance/fixtures/` (excluding the leading-underscore
// quarantine directories) and emits a Markdown table of `Shape |
// Route | Strict` rows. The output is a non-blocking CI artifact
// surfaced by `task conformance:routing-matrix` (see
// `docs/ENGINE_POLICY.md`) and used by
// reviewers to spot when a fixture family's actual route does not
// match its directory's aspirational label.
//
// The walker reads YAML only -- it does NOT talk to a live
// emulator. The fixture's `expected.route` is the source of
// truth for the matrix column; the runner enforces that the
// engine actually emits that route at execution time.
//
// Determinism: rows are sorted by fixture path so two consecutive
// invocations produce byte-identical output (pinned by
// `routing_matrix_test.go::TestRoutingMatrixIsReproducible`).
package main

import (
        "flag"
        "fmt"
        "io"
        "os"
        "path/filepath"
        "slices"
        "sort"
        "strings"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
)

func main() {
        code, err := run(os.Args[1:], os.Stdout, os.Stderr)
        if err != nil {
                fmt.Fprintln(os.Stderr, "routing-matrix:", err)
        }
        os.Exit(code)
}

func run(args []string, stdout, stderr io.Writer) (int, error) {
        fs := flag.NewFlagSet("routing-matrix", flag.ContinueOnError)
        fs.SetOutput(stderr)
        fixturesDir := fs.String("fixtures", "conformance/fixtures",
                "path to the fixtures directory (or single fixture file)")
        outputFile := fs.String("output-file", "",
                "if set, write the Markdown table to this path (truncate). "+
                        "Stdout still receives the same bytes for piping.")
        if err := fs.Parse(args); err != nil {
                return 2, err
        }
        table, err := buildMatrix(*fixturesDir)
        if err != nil {
                return 1, err
        }
        if _, err := io.WriteString(stdout, table); err != nil {
                return 1, err
        }
        if *outputFile != "" {
                // 0o600 keeps gosec G306 quiet -- the matrix is a
                // reproducible function of the public fixture YAMLs and
                // has no secret content, but the linter wants a strict
                // mode and the CI artifact uploader (or local
                // inspection) does not rely on group-readable
                // permissions on the on-disk copy.
                if err := os.WriteFile(*outputFile, []byte(table), 0o600); err != nil {
                        return 1, fmt.Errorf("write %s: %w", *outputFile, err)
                }
        }
        return 0, nil
}

// matrixRow is the rendered shape of a single fixture in the
// matrix output. Kept private to the rendering helpers below.
type matrixRow struct {
        shape  string
        route  string
        strict bool
}

// buildMatrix loads every fixture under `dir` (LoadDir already
// skips the `_*/` quarantine directories) and renders a Markdown
// table sorted by fixture path. The third column flags
// `route_strict: false` rows so a reviewer can spot the
// documentation-only entries (typically error-path fixtures whose
// engine response never carries a trailer).
func buildMatrix(dir string) (string, error) {
        fixtures, err := runner.LoadDir(dir)
        if err != nil {
                return "", err
        }
        rows := collectMatrixRows(fixtures, dir)
        return renderMatrix(rows), nil
}

// collectMatrixRows converts loaded fixtures into the sorted-by-
// shape row set the renderer expects. Path normalization
// (forward-slash, suffix-stripped) lives here so the rendering
// helper stays pure-string.
func collectMatrixRows(fixtures []*runner.Fixture, dir string) []matrixRow {
        rows := make([]matrixRow, 0, len(fixtures))
        for _, f := range fixtures {
                rel, err := filepath.Rel(dir, f.Path)
                if err != nil {
                        rel = f.Path
                }
                shape := filepath.ToSlash(
                        strings.TrimSuffix(rel, filepath.Ext(rel)),
                )
                route := f.Expected.Route
                if route == "" && len(f.Expected.RouteAllowlist) > 0 {
                        route = "[" + strings.Join(f.Expected.RouteAllowlist, ", ") + "]"
                }
                if route == "" {
                        route = "(unassigned)"
                }
                rows = append(rows, matrixRow{
                        shape:  shape,
                        route:  route,
                        strict: f.Expected.RouteStrictDefault(),
                })
        }
        sort.SliceStable(rows, func(i, j int) bool { return rows[i].shape < rows[j].shape })
        return rows
}

// renderMatrix produces the Markdown body. Pulled out of
// buildMatrix to keep the statement count below the funlen cap
// and to make the renderer trivially diffable in tests.
func renderMatrix(rows []matrixRow) string {
        counts := map[string]int{}
        for _, r := range rows {
                counts[r.route]++
        }
        var b strings.Builder
        b.WriteString(matrixHeader)
        writeSummary(&b, counts, len(rows))
        writePerFixture(&b, rows)
        return b.String()
}

const matrixHeader = `# Conformance routing matrix

Generated by ` + "`task conformance:routing-matrix`" + `. Each row pins the route the coordinator's ` +
        "`RouteClassifier`" + ` chooses for the fixture's ` + "`query:`" + ` step (the ` + "`setup:`" +
        ` steps run on their own routes).

Strict=` + "`false`" + ` flags documentation-only rows: the engine returns before ` +
        "`EmitTrailers`" + ` fires (typically error-path fixtures), so the runner skips the assertion at execution time. The route value stays in the matrix as planning-time documentation.

`

func writeSummary(b *strings.Builder, counts map[string]int, total int) {
        b.WriteString("## Per-route totals\n\n| Route | Count |\n|---|---|\n")
        for _, name := range runner.KnownRouteNames() {
                if n := counts[name]; n > 0 {
                        fmt.Fprintf(b, "| `%s` | %d |\n", name, n)
                }
        }
        // Print any non-canonical buckets (allowlist-rendered rows,
        // `(unassigned)`) AFTER the canonical block so a stray label
        // is visible to a reviewer.
        for k := range counts {
                if !isCanonicalRoute(k) {
                        fmt.Fprintf(b, "| `%s` | %d |\n", k, counts[k])
                }
        }
        fmt.Fprintf(b, "| **total** | %d |\n", total)
}

func writePerFixture(b *strings.Builder, rows []matrixRow) {
        b.WriteString("\n## Per-fixture\n\n| Shape | Route | Strict |\n|---|---|---|\n")
        for _, r := range rows {
                strict := "true"
                if !r.strict {
                        strict = "false"
                }
                fmt.Fprintf(b, "| `%s` | `%s` | %s |\n", r.shape, r.route, strict)
        }
}

func isCanonicalRoute(s string) bool {
        return slices.Contains(runner.KnownRouteNames(), s)
}

// Binary runner is the conformance harness's CLI. It loads YAML
// fixtures, iterates the engine x storage profile matrix, and emits
// PASS / FAIL records (or a JSON report). See `conformance/README.md`
// for the fixture schema and worked examples; this file is just flag
// parsing and exit-code wiring.
package main

import (
        "context"
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
        "os/signal"
        "path/filepath"
        "strings"
        "syscall"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
)

// stringSliceFlag is a repeatable flag value, so `--profile duckdb`
// (and any future profile names) accumulate into one slice rather
// than overwriting.
type stringSliceFlag []string

func (s *stringSliceFlag) String() string { return strings.Join(*s, ",") }
func (s *stringSliceFlag) Set(v string) error {
        if v == "" {
                return nil
        }
        *s = append(*s, v)
        return nil
}

func main() {
        code, err := run()
        if err != nil {
                _, _ = fmt.Fprintln(os.Stderr, "runner:", err)
                os.Exit(2)
        }
        if code != 0 {
                os.Exit(code)
        }
}

// runnerConfig is the parsed view of the CLI flags that `run` hands
// off to the conformance runner. Pulled out of run() so the flag-
// parsing block (and the engine-binary / --connect mutual-exclusion
// rule) can live in its own helper without smuggling state through
// closures.
type runnerConfig struct {
        Fixtures        string
        EngineBinary    string
        Connect         string
        UpdateBaselines bool
        Output          string
        OutputFile      string
        Profiles        []string
        HelpExit        bool // user passed --help; main should exit 0.
}

// run drives the binary's flag parse + signal handling + runner.Run
// orchestration. Returns the exit code main should hand to os.Exit
// (so any defers in this function actually fire) plus any
// runner-internal error main should print.
func run() (int, error) {
        cfg, err := parseFlags(os.Args[1:])
        if err != nil {
                if errors.Is(err, flag.ErrHelp) {
                        return 0, nil
                }
                return 0, err
        }
        if cfg.HelpExit {
                return 0, nil
        }

        runnerStdout, cleanup, err := setupOutputFile(cfg.OutputFile)
        if err != nil {
                return 0, err
        }
        if cleanup != nil {
                defer cleanup()
        }

        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()

        // SIGINT/SIGTERM cancel the runner's context so the harness can
        // SIGINT every emulator subprocess it spawned. The runner
        // returns its in-progress Report so the caller still sees what
        // PASSed before the cancel.
        sigCh := make(chan os.Signal, 1)
        signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
        go func() {
                <-sigCh
                cancel()
        }()
        defer signal.Stop(sigCh)

        report, err := runner.Run(ctx, runner.Options{
                FixturesPath: cfg.Fixtures,
                Harness: runner.HarnessOptions{
                        EngineBinary:   cfg.EngineBinary,
                        ConnectAddress: cfg.Connect,
                        EngineStdout:   os.Stderr,
                        EngineStderr:   os.Stderr,
                },
                Profiles:        cfg.Profiles,
                UpdateBaselines: cfg.UpdateBaselines,
                Output:          cfg.Output,
                Out:             runnerStdout,
                Err:             os.Stderr,
        })
        if err != nil {
                return 0, err
        }
        return report.ExitCode(), nil
}

// parseFlags wires up the CLI's flag set, applies the
// --engine-binary / --connect mutual-exclusion rule, and returns a
// runnerConfig. Returns flag.ErrHelp when the user passed --help so
// the caller can short-circuit cleanly.
func parseFlags(args []string) (runnerConfig, error) {
        fs := flag.NewFlagSet("runner", flag.ContinueOnError)
        fs.SetOutput(os.Stderr)

        var (
                fixtures     = fs.String("fixtures", "conformance/fixtures", "directory or file containing fixture YAML")
                engineBinary = fs.String(
                        "engine-binary",
                        "./bin/emulator_main",
                        "path to emulator_main; mutually exclusive with --connect",
                )
                connect = fs.String(
                        "connect",
                        "",
                        "HOST:PORT of an already-running engine to dial instead of spawning emulator_main",
                )
                updateBaselines = fs.Bool(
                        "update-baselines",
                        false,
                        "overwrite each fixture's expected: block with the captured response (bootstrap mode)",
                )
                output     = fs.String("output", "text", "output format: text or json")
                outputFile = fs.String(
                        "output-file",
                        "",
                        "if non-empty, write the rendered report to this file (atomic write) in addition to stdout",
                )
                profiles stringSliceFlag
                showHelp = fs.Bool("help", false, "print usage and exit")
        )
        fs.Var(&profiles, "profile", "restrict the matrix to one profile (repeatable). Default: all known profiles")

        fs.Usage = func() { writeUsage(fs) }

        if err := fs.Parse(args); err != nil {
                return runnerConfig{}, err
        }
        if *showHelp {
                fs.Usage()
                return runnerConfig{HelpExit: true}, nil
        }
        if *engineBinary != "" && *connect != "" {
                // Flag default is `./bin/emulator_main`; only treat it as
                // user-supplied when --connect is the empty default. The
                // CLI lets the user pick either path explicitly.
                if *engineBinary != "./bin/emulator_main" {
                        return runnerConfig{}, errors.New("--engine-binary and --connect are mutually exclusive")
                }
                *engineBinary = ""
        }
        return runnerConfig{
                Fixtures:        *fixtures,
                EngineBinary:    *engineBinary,
                Connect:         *connect,
                UpdateBaselines: *updateBaselines,
                Output:          *output,
                OutputFile:      *outputFile,
                Profiles:        []string(profiles),
        }, nil
}

// writeUsage emits the runner's --help banner. Pulled out of
// parseFlags so the flag-parsing function stays under the funlen
// limit; the heredoc's prose is the bulk of its line count.
func writeUsage(fs *flag.FlagSet) {
        _, _ = fmt.Fprintln(fs.Output(), `Usage: runner [flags]

Run the BigQuery emulator conformance fixtures and diff against
expected rows or errors. By default the runner spawns its own
emulator_main subprocess per fixture x profile; --connect HOST:PORT
reaches an already-running gateway (used by CI).

Flags:`)
        fs.PrintDefaults()
        _, _ = fmt.Fprintln(fs.Output(), `
Profiles:
  duckdb   duckdb engine + duckdb storage  (only profile today)

Exit codes:
  0   every fixture x profile PASSed
  1   at least one fixture x profile FAILed
  2   runner-internal error (bad YAML, can't start engine, etc)

See conformance/README.md for the fixture schema and JSON output
shape.`)
}

// setupOutputFile honors --output-file: it opens a sibling tmp file,
// returns an io.MultiWriter that tees the runner's output into both
// stdout and the tmp file, plus a cleanup closure the caller must
// defer to atomically rename the tmp file into place. When the flag
// is empty, returns os.Stdout and a nil cleanup.
//
// We rename regardless of whether the runner returned an error or
// reported a non-zero exit code (fixture mismatch): the artifact is
// still the most useful diagnostic the workflow has on hand. Only a
// CreateTemp failure (out of disk, perm denied) short-circuits
// before any data lands.
func setupOutputFile(path string) (io.Writer, func(), error) {
        if path == "" {
                return os.Stdout, nil, nil
        }
        dir := filepath.Dir(path)
        if dir == "" {
                dir = "."
        }
        tmp, err := os.CreateTemp(dir, ".conformance-runner-*.tmp")
        if err != nil {
                return nil, nil, fmt.Errorf("create --output-file tmp: %w", err)
        }
        tmpName := tmp.Name()
        cleanup := func() {
                _ = tmp.Close()
                if err := os.Rename(tmpName, path); err != nil {
                        _, _ = fmt.Fprintln(os.Stderr, "runner: rename --output-file:", err)
                        _ = os.Remove(tmpName)
                }
        }
        return io.MultiWriter(os.Stdout, tmp), cleanup, nil
}

// session runs stateful multi-step conformance sessions against one long-lived
// emulator process per session.
package main

import (
        "context"
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
        "os/signal"
        "path/filepath"
        "strings"
        "syscall"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
)

type sessionConfig struct {
        Sessions        string
        EngineBinary    string
        Connect         string
        Profile         string
        Output          string
        OutputFile      string
        IncludeSelfTest bool
}

func main() {
        code, err := run()
        if err != nil {
                _, _ = fmt.Fprintln(os.Stderr, "session:", err)
                os.Exit(2)
        }
        if code != 0 {
                os.Exit(code)
        }
}

func run() (int, error) {
        cfg, err := parseFlags(os.Args[1:])
        if err != nil {
                if errors.Is(err, flag.ErrHelp) {
                        return 0, nil
                }
                return 0, err
        }

        out, cleanup, err := setupOutputFile(cfg.OutputFile)
        if err != nil {
                return 0, err
        }
        if cleanup != nil {
                defer cleanup()
        }

        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
        sigCh := make(chan os.Signal, 1)
        signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)
        go func() {
                <-sigCh
                cancel()
        }()
        defer signal.Stop(sigCh)

        var profiles []string
        if strings.TrimSpace(cfg.Profile) != "" {
                profiles = []string{cfg.Profile}
        }

        report, err := runner.RunSessions(ctx, runner.SessionOptions{
                SessionsPath:    cfg.Sessions,
                IncludeSelfTest: cfg.IncludeSelfTest,
                Harness: runner.HarnessOptions{
                        EngineBinary:   cfg.EngineBinary,
                        ConnectAddress: cfg.Connect,
                        EngineStdout:   os.Stderr,
                        EngineStderr:   os.Stderr,
                },
                Profiles: profiles,
                Output:   cfg.Output,
                Out:      out,
                Err:      os.Stderr,
        })
        if err != nil {
                return 0, err
        }
        return report.ExitCode(), nil
}

func parseFlags(args []string) (sessionConfig, error) {
        fs := flag.NewFlagSet("session", flag.ContinueOnError)
        fs.SetOutput(os.Stderr)
        var cfg sessionConfig
        fs.StringVar(&cfg.Sessions, "sessions", runner.DefaultSessionsDir, "session directory or single YAML")
        fs.StringVar(&cfg.EngineBinary, "engine-binary", "./bin/emulator_main", "path to emulator_main")
        fs.StringVar(&cfg.Connect, "connect", "", "HOST:PORT of a running gateway")
        fs.StringVar(&cfg.Profile, "profile", "", "runtime profile (default: all known profiles)")
        fs.StringVar(&cfg.Output, "output", "text", "output format: text or json")
        fs.StringVar(&cfg.OutputFile, "output-file", "", "tee report to this file (atomic write)")
        fs.BoolVar(&cfg.IncludeSelfTest, "include-selftest", false, "run _-prefixed self-test session files")
        if err := fs.Parse(args); err != nil {
                return sessionConfig{}, err
        }
        if cfg.EngineBinary != "" && cfg.Connect != "" && cfg.EngineBinary != "./bin/emulator_main" {
                return sessionConfig{}, errors.New("--engine-binary and --connect are mutually exclusive")
        }
        if cfg.Connect != "" {
                cfg.EngineBinary = ""
        }
        return cfg, nil
}

func setupOutputFile(path string) (io.Writer, func(), error) {
        if path == "" {
                return os.Stdout, nil, nil
        }
        dir := filepath.Dir(path)
        if dir == "" {
                dir = "."
        }
        tmp, err := os.CreateTemp(dir, ".session-*.tmp")
        if err != nil {
                return nil, nil, fmt.Errorf("create --output-file tmp: %w", err)
        }
        tmpName := tmp.Name()
        cleanup := func() {
                _ = tmp.Close()
                if err := os.Rename(tmpName, path); err != nil {
                        _, _ = fmt.Fprintln(os.Stderr, "session: rename --output-file:", err)
                        _ = os.Remove(tmpName)
                }
        }
        return io.MultiWriter(os.Stdout, tmp), cleanup, nil
}

package differential

import (
        "strings"
)

// DivergenceKind classifies emulator vs oracle outcomes for the differential lane.
type DivergenceKind string

const (
        KindMatch              DivergenceKind = "match"
        KindFeatureGap         DivergenceKind = "feature_gap"
        KindSemanticDivergence DivergenceKind = "semantic_divergence"
        KindErrorDivergence    DivergenceKind = "error_divergence"
        KindCrash              DivergenceKind = "crash"
)

// ClassifyInput carries the signals ClassifyDivergence needs.
type ClassifyInput struct {
        OracleSuccess   bool
        EmulatorSuccess bool
        EmulatorStatus  int
        EmulatorBody    []byte
        Diff            string
        RunnerMessage   string
}

// ClassifyDivergence maps a replay outcome to a divergence bucket.
func ClassifyDivergence(in ClassifyInput) DivergenceKind {
        msg := strings.ToLower(in.RunnerMessage + " " + in.Diff + " " + string(in.EmulatorBody))
        if isCrashSignal(msg) {
                return KindCrash
        }
        if isFeatureGap(msg) {
                return KindFeatureGap
        }
        if in.OracleSuccess != in.EmulatorSuccess {
                return KindErrorDivergence
        }
        if in.Diff != "" {
                return KindSemanticDivergence
        }
        return KindMatch
}

func isFeatureGap(msg string) bool {
        needles := []string{
                "unimplemented",
                "not implemented",
                "not yet implemented",
                "setoperationscan op is not union all",
                "withrefscan without active withscan bindings",
        }
        for _, n := range needles {
                if strings.Contains(msg, n) {
                        return true
                }
        }
        return false
}

func isCrashSignal(msg string) bool {
        needles := []string{
                "signal: killed",
                "signal: aborted",
                "engine process exited",
                "lost connection",
                "connection refused",
                "broken pipe",
                "segfault",
                "core dumped",
        }
        for _, n := range needles {
                if strings.Contains(msg, n) {
                        return true
                }
        }
        return false
}

// Package differential replays a corpus of SQL cases against the local
// emulator and diffs results against committed production-BigQuery oracles.
package differential

import (
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "sort"
        "strings"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
        "gopkg.in/yaml.v3"
)

// CorpusCase is one differential-lane YAML under conformance/differential/corpus/.
// It reuses the fixture setup/query schema but pins expectations in a separate
// oracle JSON file referenced by OracleRef.
type CorpusCase struct {
        Name           string   `yaml:"name"`
        Description    string   `yaml:"description,omitempty"`
        Profiles       []string `yaml:"profiles,omitempty"`
        ProjectID      string   `yaml:"project_id,omitempty"`
        DefaultDataset string   `yaml:"default_dataset,omitempty"`

        OracleRef       string               `yaml:"oracle_ref"`
        OracleSource    string               `yaml:"oracle_source,omitempty"`
        Match           runner.MatchMode     `yaml:"match,omitempty"`
        KnownFailing    bool                 `yaml:"known_failing,omitempty"`
        QueryParameters []QueryParameterYAML `yaml:"query_parameters,omitempty"`

        Setup []runner.SetupStep `yaml:"setup,omitempty"`
        Query string             `yaml:"query"`

        Path string `yaml:"-"`
}

// QueryParameterYAML is the corpus-side spelling of a named query parameter.
type QueryParameterYAML struct {
        Name             string                          `yaml:"name"`
        Type             string                          `yaml:"type"`
        Value            string                          `yaml:"value,omitempty"`
        ArrayElementType string                          `yaml:"array_element_type,omitempty"`
        ArrayValues      []string                        `yaml:"array_values,omitempty"`
        StructFields     []QueryParameterStructFieldYAML `yaml:"struct_fields,omitempty"`
        StructValues     map[string]string               `yaml:"struct_values,omitempty"`
}

// QueryParameterStructFieldYAML names one STRUCT parameter field.
type QueryParameterStructFieldYAML struct {
        Name string `yaml:"name"`
        Type string `yaml:"type"`
}

// DefaultCorpusDir is the committed corpus root.
const DefaultCorpusDir = "conformance/differential/corpus"

// DefaultOracleDir is the committed oracle JSON root.
const DefaultOracleDir = "conformance/differential/oracle"

// LoadCorpus parses a single corpus YAML file.
func LoadCorpus(path string) (*CorpusCase, error) {
        data, err := os.ReadFile(path) //nolint:gosec // path is CLI-controlled
        if err != nil {
                return nil, fmt.Errorf("read %s: %w", path, err)
        }
        return loadCorpusBytes(data, path)
}

func loadCorpusBytes(data []byte, path string) (*CorpusCase, error) {
        var c CorpusCase
        dec := yaml.NewDecoder(strings.NewReader(string(data)))
        dec.KnownFields(true)
        if err := dec.Decode(&c); err != nil {
                return nil, fmt.Errorf("parse %s: %w", path, err)
        }
        c.Path = path
        if err := c.normalize(); err != nil {
                return nil, fmt.Errorf("validate %s: %w", path, err)
        }
        return &c, nil
}

// LoadCorpusDir walks dir (or loads a single file) and returns every corpus
// case. Files and directories whose basename starts with "_" are skipped
// unless includeSelfTest is true (unit/self-test lane).
func LoadCorpusDir(pathOrDir string, includeSelfTest bool) ([]*CorpusCase, error) {
        info, err := os.Stat(pathOrDir)
        if err != nil {
                return nil, fmt.Errorf("stat %s: %w", pathOrDir, err)
        }
        if !info.IsDir() {
                c, err := LoadCorpus(pathOrDir)
                if err != nil {
                        return nil, err
                }
                return []*CorpusCase{c}, nil
        }

        var cases []*CorpusCase
        walkErr := filepath.Walk(pathOrDir, func(p string, fi os.FileInfo, walkErr error) error {
                if walkErr != nil {
                        return walkErr
                }
                if fi.IsDir() {
                        base := filepath.Base(p)
                        if base != filepath.Base(pathOrDir) && strings.HasPrefix(base, "_") && !includeSelfTest {
                                return filepath.SkipDir
                        }
                        return nil
                }
                ext := strings.ToLower(filepath.Ext(p))
                if ext != ".yaml" && ext != ".yml" {
                        return nil
                }
                base := filepath.Base(p)
                if strings.HasPrefix(base, "_") && !includeSelfTest {
                        return nil
                }
                c, err := LoadCorpus(p)
                if err != nil {
                        return err
                }
                cases = append(cases, c)
                return nil
        })
        if walkErr != nil {
                return nil, walkErr
        }
        sort.Slice(cases, func(i, j int) bool { return cases[i].Path < cases[j].Path })
        return cases, nil
}

func (c *CorpusCase) normalize() error {
        if strings.TrimSpace(c.Name) == "" {
                return errors.New("name is required")
        }
        if strings.TrimSpace(c.Query) == "" {
                return errors.New("query is required")
        }
        if strings.TrimSpace(c.OracleRef) == "" {
                return errors.New("oracle_ref is required")
        }
        if c.ProjectID == "" {
                c.ProjectID = "proj-diff-" + sanitizeID(c.Name)
        }
        if len(c.Profiles) == 0 {
                c.Profiles = []string{runner.ProfileDuckDB}
        }
        known := make(map[string]bool, len(runner.KnownProfiles()))
        for _, p := range runner.KnownProfiles() {
                known[p.Name] = true
        }
        for _, p := range c.Profiles {
                if !known[p] {
                        return fmt.Errorf("unknown profile %q", p)
                }
        }
        for i, step := range c.Setup {
                if err := step.ValidateExported(); err != nil {
                        return fmt.Errorf("setup[%d]: %w", i, err)
                }
        }
        return nil
}

func sanitizeID(s string) string {
        var b strings.Builder
        b.Grow(len(s))
        for _, r := range strings.ToLower(s) {
                switch {
                case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
                        b.WriteRune(r)
                case r == '-':
                        b.WriteRune('-')
                default:
                        b.WriteRune('-')
                }
        }
        return b.String()
}

package differential

import (
        "encoding/json"
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// OracleError is the recorded production BigQuery error envelope.
type OracleError struct {
        Code    int    `json:"code"`
        Message string `json:"message"`
}

// Oracle is a committed production-BigQuery capture for one corpus case.
// Rows/schema mirror bqtypes.QueryResponse so the fixture comparator is reused.
type Oracle struct {
        CapturedAt   string           `json:"captured_at"`
        Project      string           `json:"project"`
        JobID        string           `json:"job_id,omitempty"`
        OracleSource string           `json:"oracle_source,omitempty"`
        Match        runner.MatchMode `json:"match,omitempty"`
        Success      bool             `json:"success"`

        Schema       *bqtypes.TableSchema  `json:"schema,omitempty"`
        Rows         []bqtypes.Row         `json:"rows,omitempty"`
        JobReference *bqtypes.JobReference `json:"jobReference,omitempty"`
        Error        *OracleError          `json:"error,omitempty"`
}

// LoadOracle reads oracle JSON referenced by ref (basename or relative path).
func LoadOracle(oracleDir, ref string) (*Oracle, error) {
        path := ref
        if !strings.Contains(ref, string(os.PathSeparator)) {
                path = filepath.Join(oracleDir, ref)
        }
        data, err := os.ReadFile(path) //nolint:gosec // oracle dir is CLI-controlled
        if err != nil {
                return nil, fmt.Errorf("read oracle %s: %w", path, err)
        }
        var o Oracle
        if err := json.Unmarshal(data, &o); err != nil {
                return nil, fmt.Errorf("parse oracle %s: %w", path, err)
        }
        if err := o.validate(); err != nil {
                return nil, fmt.Errorf("validate oracle %s: %w", path, err)
        }
        return &o, nil
}

func (o *Oracle) validate() error {
        if o.CapturedAt == "" {
                return errors.New("captured_at is required")
        }
        if o.Project == "" {
                return errors.New("project is required")
        }
        if o.Success {
                if o.Schema == nil || len(o.Schema.Fields) == 0 {
                        return errors.New("success oracle requires schema.fields")
                }
                return nil
        }
        if o.Error == nil {
                return errors.New("error oracle requires error block")
        }
        if o.Error.Code == 0 && o.Error.Message == "" {
                return errors.New("error oracle requires code or message")
        }
        return nil
}

// ExpectationFromOracle converts wire rows into the runner Expectation used by
// CompareRows / CompareError.
func ExpectationFromOracle(o *Oracle, caseMatch runner.MatchMode) runner.Expectation {
        match := o.Match
        if match == "" {
                match = caseMatch
        }
        if match == "" {
                match = runner.MatchOrdered
        }
        if !o.Success {
                exp := runner.ExpectedError{MessageContains: o.Error.Message}
                if o.Error.Code != 0 {
                        exp.Code = o.Error.Code
                }
                return runner.Expectation{Match: match, Error: &exp}
        }
        cols := schemaColumns(o.Schema)
        rows := make([]map[string]any, 0, len(o.Rows))
        for _, r := range o.Rows {
                row := make(map[string]any, len(r.F))
                for i, cell := range r.F {
                        name := positionalName(cols, i)
                        row[name] = oracleCellValue(cell.V)
                }
                rows = append(rows, row)
        }
        return runner.Expectation{Match: match, Rows: rows}
}

// WriteOracle atomically writes an oracle JSON file.
func WriteOracle(path string, o *Oracle) error {
        if o.CapturedAt == "" {
                o.CapturedAt = time.Now().UTC().Format(time.RFC3339)
        }
        data, err := json.MarshalIndent(o, "", "  ")
        if err != nil {
                return err
        }
        data = append(data, '\n')
        dir := filepath.Dir(path)
        tmp, err := os.CreateTemp(dir, ".oracle-*.tmp")
        if err != nil {
                return err
        }
        tmpName := tmp.Name()
        if _, err := tmp.Write(data); err != nil {
                _ = tmp.Close()
                _ = os.Remove(tmpName)
                return err
        }
        if err := tmp.Close(); err != nil {
                _ = os.Remove(tmpName)
                return err
        }
        return os.Rename(tmpName, path)
}

func oracleCellValue(v any) any {
        if v == nil {
                return nil
        }
        if s, ok := v.(string); ok {
                return s
        }
        return v
}

func schemaColumns(schema *bqtypes.TableSchema) []string {
        if schema == nil {
                return nil
        }
        out := make([]string, len(schema.Fields))
        for i, f := range schema.Fields {
                out[i] = f.Name
        }
        return out
}

func positionalName(cols []string, i int) string {
        if i < len(cols) {
                return cols[i]
        }
        return fmt.Sprintf("col%d", i)
}

// OracleFromQueryResponse builds a success oracle from a gateway QueryResponse body.
func OracleFromQueryResponse(project string, source string, match runner.MatchMode, body []byte) (*Oracle, error) {
        var run bqtypes.QueryResponse
        if err := json.Unmarshal(body, &run); err != nil {
                return nil, fmt.Errorf("decode QueryResponse: %w", err)
        }
        o := &Oracle{
                Project:      project,
                OracleSource: source,
                Match:        match,
                Success:      true,
                Schema:       run.Schema,
                Rows:         run.Rows,
                JobReference: run.JobReference,
        }
        if run.JobReference != nil {
                o.JobID = run.JobReference.JobID
        }
        return o, nil
}

package differential

import (
        "context"
        "encoding/json"
        "fmt"
        "io"
        "os"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

const JSONSchemaVersion = 1

// Options configures a differential replay invocation.
type Options struct {
        CorpusDir       string
        OracleDir       string
        IncludeSelfTest bool
        Harness         runner.HarnessOptions
        Profile         string
        Output          string
        Out             io.Writer
        Err             io.Writer
}

// Result is one corpus case outcome.
type Result struct {
        Case         string         `json:"case"`
        Path         string         `json:"path"`
        Profile      string         `json:"profile"`
        Status       runner.Status  `json:"status"`
        Divergence   DivergenceKind `json:"divergence,omitempty"`
        KnownFailing bool           `json:"known_failing,omitempty"`
        OracleSource string         `json:"oracle_source,omitempty"`
        DurationMs   int64          `json:"duration_ms"`
        Message      string         `json:"message,omitempty"`
        Diff         string         `json:"diff,omitempty"`
}

// Report aggregates a differential lane run.
type Report struct {
        SchemaVersion int            `json:"schema_version"`
        Summary       runner.Summary `json:"summary"`
        Results       []Result       `json:"results"`
}

// ExitCode mirrors the fixture runner semantics.
func (r *Report) ExitCode() int {
        if r == nil {
                return 2
        }
        if r.Summary.Failed > 0 {
                return 1
        }
        return 0
}

// Run replays every corpus case against the emulator and diffs vs committed oracles.
func Run(ctx context.Context, opts Options) (*Report, error) {
        normalizeOptions(&opts)

        cases, err := LoadCorpusDir(opts.CorpusDir, opts.IncludeSelfTest)
        if err != nil {
                return nil, err
        }
        profile, ok := runner.LookupProfile(opts.Profile)
        if !ok {
                return nil, fmt.Errorf("unknown profile %q", opts.Profile)
        }

        report := &Report{SchemaVersion: JSONSchemaVersion}
        for _, c := range cases {
                res := runCase(ctx, opts, profile, c)
                report.Results = append(report.Results, res)
                report.Summary.Total++
                switch res.Status {
                case runner.StatusPass:
                        report.Summary.Passed++
                case runner.StatusFail:
                        report.Summary.Failed++
                case runner.StatusSkip:
                        report.Summary.Skipped++
                }
        }
        if err := renderReport(opts, report); err != nil {
                return report, err
        }
        return report, nil
}

func normalizeOptions(opts *Options) {
        if opts.CorpusDir == "" {
                opts.CorpusDir = DefaultCorpusDir
        }
        if opts.OracleDir == "" {
                opts.OracleDir = DefaultOracleDir
        }
        if opts.Profile == "" {
                opts.Profile = runner.ProfileDuckDB
        }
        if opts.Output == "" {
                opts.Output = "text"
        }
        if opts.Out == nil {
                opts.Out = os.Stdout
        }
        if opts.Err == nil {
                opts.Err = os.Stderr
        }
}

func runCase(ctx context.Context, opts Options, profile runner.Profile, c *CorpusCase) Result {
        started := time.Now()
        res := Result{
                Case:         c.Name,
                Path:         c.Path,
                Profile:      profile.Name,
                Status:       runner.StatusFail,
                KnownFailing: c.KnownFailing,
                OracleSource: c.OracleSource,
        }

        oracle, err := LoadOracle(opts.OracleDir, c.OracleRef)
        if err != nil {
                res.Message = "load oracle: " + err.Error()
                res.Divergence = KindCrash
                return finish(res, started)
        }
        if res.OracleSource == "" {
                res.OracleSource = oracle.OracleSource
        }

        env, startErr := runner.StartEmulator(ctx, opts.Harness, profile)
        if startErr != nil {
                res.Message = "start emulator: " + startErr.Error()
                res.Divergence = KindCrash
                return finish(res, started)
        }
        defer func() { _ = env.Close() }()

        base := env.BaseURL + "/bigquery/v2/projects/" + c.ProjectID
        if setupErr := runner.RunSetupSteps(ctx, base, env.DataDir(), c.Setup, c.DefaultDataset); setupErr != nil {
                res.Message = setupErr.Error()
                res.Divergence = ClassifyDivergence(ClassifyInput{RunnerMessage: res.Message})
                return finishMaybeKnown(res, started, c.KnownFailing)
        }

        params := toWireParams(c.QueryParameters)
        queryBody, err := runner.MarshalJobsQueryBody(c.Query, c.DefaultDataset, params)
        if err != nil {
                res.Message = err.Error()
                res.Divergence = KindCrash
                return finish(res, started)
        }
        status, body, queryErr := runner.DoRequest(ctx, base+"/queries", queryBody)
        if queryErr != nil {
                res.Message = "query rpc: " + queryErr.Error()
                res.Divergence = ClassifyDivergence(ClassifyInput{RunnerMessage: res.Message})
                return finishMaybeKnown(res, started, c.KnownFailing)
        }
        return compareAgainstOracle(res, oracle, c, status, body, started)
}

func compareAgainstOracle(
        res Result,
        oracle *Oracle,
        c *CorpusCase,
        status int,
        body []byte,
        started time.Time,
) Result {
        exp := ExpectationFromOracle(oracle, c.Match)
        emulatorSuccess := status >= 200 && status < 300

        if exp.Error != nil {
                return compareErrorOracle(res, exp, c, status, body, emulatorSuccess, started)
        }
        if !emulatorSuccess {
                res.Message = fmt.Sprintf("query failed with HTTP %d", status)
                res.Diff = "body: " + snippet(body)
                res.Divergence = ClassifyDivergence(ClassifyInput{
                        OracleSuccess: true, EmulatorSuccess: false,
                        EmulatorStatus: status, EmulatorBody: body,
                        Diff: res.Diff, RunnerMessage: res.Message,
                })
                return finishMaybeKnown(res, started, c.KnownFailing)
        }

        var run bqtypes.QueryResponse
        if err := json.Unmarshal(body, &run); err != nil {
                res.Message = "decode QueryResponse: " + err.Error()
                res.Divergence = KindCrash
                return finish(res, started)
        }
        if diff := runner.CompareRows(exp, run.Schema, run.Rows); diff != "" {
                res.Message = "row mismatch"
                res.Diff = diff
                res.Divergence = ClassifyDivergence(ClassifyInput{
                        OracleSuccess: true, EmulatorSuccess: true,
                        EmulatorStatus: status, EmulatorBody: body,
                        Diff: diff, RunnerMessage: res.Message,
                })
                return finishMaybeKnown(res, started, c.KnownFailing)
        }
        res.Status = runner.StatusPass
        res.Divergence = KindMatch
        return finish(res, started)
}

func compareErrorOracle(
        res Result,
        exp runner.Expectation,
        c *CorpusCase,
        status int,
        body []byte,
        emulatorSuccess bool,
        started time.Time,
) Result {
        if emulatorSuccess {
                res.Message = "expected error, got success"
                res.Diff = fmt.Sprintf("status: %d\nbody: %s", status, snippet(body))
        } else if diff := runner.CompareError(*exp.Error, status, body); diff != "" {
                res.Message = "error mismatch"
                res.Diff = diff
        } else {
                res.Status = runner.StatusPass
                res.Divergence = KindMatch
                return finish(res, started)
        }
        res.Divergence = ClassifyDivergence(ClassifyInput{
                OracleSuccess: false, EmulatorSuccess: emulatorSuccess,
                EmulatorStatus: status, EmulatorBody: body,
                Diff: res.Diff, RunnerMessage: res.Message,
        })
        return finishMaybeKnown(res, started, c.KnownFailing)
}

func finishMaybeKnown(res Result, started time.Time, knownFailing bool) Result {
        res = finish(res, started)
        if knownFailing && res.Status == runner.StatusFail {
                res.Status = runner.StatusSkip
                res.Message = "known_failing (expected divergence): " + res.Message
        }
        return res
}

func finish(res Result, started time.Time) Result {
        res.DurationMs = time.Since(started).Milliseconds()
        return res
}

func toWireParams(params []QueryParameterYAML) []bqtypes.QueryParameter {
        if len(params) == 0 {
                return nil
        }
        out := make([]bqtypes.QueryParameter, 0, len(params))
        for _, p := range params {
                paramType := &bqtypes.QueryParameterType{Type: strings.ToUpper(p.Type)}
                paramValue := &bqtypes.QueryParameterValue{Value: p.Value}

                if elem := strings.TrimSpace(p.ArrayElementType); elem != "" {
                        paramType.ArrayType = &bqtypes.QueryParameterType{
                                Type: strings.ToUpper(elem),
                        }
                }
                if len(p.StructFields) > 0 {
                        for _, f := range p.StructFields {
                                paramType.StructTypes = append(paramType.StructTypes, bqtypes.QueryParameterStructType{
                                        Name: f.Name,
                                        Type: bqtypes.QueryParameterType{Type: strings.ToUpper(f.Type)},
                                })
                        }
                }
                if len(p.ArrayValues) > 0 {
                        paramValue.ArrayValues = make([]bqtypes.QueryParameterValue, 0, len(p.ArrayValues))
                        for _, v := range p.ArrayValues {
                                paramValue.ArrayValues = append(paramValue.ArrayValues, bqtypes.QueryParameterValue{
                                        Value: v,
                                })
                        }
                        paramValue.Value = ""
                }
                if len(p.StructValues) > 0 {
                        paramValue.StructValues = make(map[string]bqtypes.QueryParameterValue, len(p.StructValues))
                        for name, v := range p.StructValues {
                                paramValue.StructValues[name] = bqtypes.QueryParameterValue{Value: v}
                        }
                        paramValue.Value = ""
                }

                out = append(out, bqtypes.QueryParameter{
                        Name:           p.Name,
                        ParameterType:  paramType,
                        ParameterValue: paramValue,
                })
        }
        return out
}

func snippet(b []byte) string {
        const limit = 240
        s := strings.TrimSpace(string(b))
        if len(s) > limit {
                s = s[:limit] + "..."
        }
        return s
}

func renderReport(opts Options, report *Report) error {
        switch opts.Output {
        case "json":
                enc := json.NewEncoder(opts.Out)
                enc.SetIndent("", "  ")
                return enc.Encode(report)
        default:
                return renderText(opts.Out, report)
        }
}

func renderText(w io.Writer, report *Report) error {
        _, _ = fmt.Fprintf(w, "differential summary: %d total, %d passed, %d failed, %d skipped\n",
                report.Summary.Total, report.Summary.Passed, report.Summary.Failed, report.Summary.Skipped)
        for _, r := range report.Results {
                line := fmt.Sprintf("%s %s %s", r.Status, r.Case, r.Divergence)
                if r.Message != "" {
                        line += " — " + r.Message
                }
                _, _ = fmt.Fprintln(w, line)
                if r.Diff != "" {
                        _, _ = fmt.Fprintln(w, r.Diff)
                }
        }
        return nil
}

package googlesqlcorpus

import (
        "errors"
        "fmt"
        "strconv"
        "strings"
)

const (
        corpusValNaN  = "nan"
        corpusValInf  = "inf"
        corpusValNInf = "-inf"
)

// ExpectedResult is the parsed ARRAY<...>[...] expectation from a
// .test case.
type ExpectedResult struct {
        Ordered bool
        Rows    [][]any
}

// ParseExpected parses the GoogleSQL compliance result stanza.
func ParseExpected(raw string) (ExpectedResult, error) {
        raw = strings.TrimSpace(raw)
        if raw == "" {
                return ExpectedResult{}, errors.New("empty expected block")
        }
        p := &expectedParser{s: raw}
        rows, ordered, err := p.parseTopArray()
        if err != nil {
                return ExpectedResult{}, err
        }
        if p.pos < len(p.s) && strings.TrimSpace(p.s[p.pos:]) != "" {
                return ExpectedResult{}, fmt.Errorf("trailing input at %d", p.pos)
        }
        return ExpectedResult{Ordered: ordered, Rows: rows}, nil
}

type expectedParser struct {
        s   string
        pos int
}

func (p *expectedParser) parseTopArray() ([][]any, bool, error) {
        p.skipSpace()
        if !p.consume("ARRAY<") {
                return nil, false, fmt.Errorf("expected ARRAY< at %d", p.pos)
        }
        if err := p.skipTypeExpr(); err != nil {
                return nil, false, err
        }
        if !p.consume(">") {
                return nil, false, fmt.Errorf("expected > at %d", p.pos)
        }
        p.skipSpace()
        if !p.consume("[") {
                return nil, false, fmt.Errorf("expected [ at %d", p.pos)
        }
        ordered := p.consume("known order:")

        rows, err := p.parseRows()
        if err != nil {
                return nil, false, err
        }
        if !p.consume("]") {
                return nil, false, fmt.Errorf("expected ] at %d", p.pos)
        }
        return rows, ordered, nil
}

func (p *expectedParser) skipTypeExpr() error {
        depth := 1
        for p.pos < len(p.s) && depth > 0 {
                switch p.s[p.pos] {
                case '<':
                        depth++
                        p.pos++
                case '>':
                        depth--
                        if depth > 0 {
                                p.pos++
                        }
                default:
                        p.pos++
                }
        }
        if depth != 0 {
                return fmt.Errorf("unterminated type expr at %d", p.pos)
        }
        return nil
}

func (p *expectedParser) parseRows() ([][]any, error) {
        p.skipSpace()
        if p.peek() == ']' {
                return nil, nil
        }
        var rows [][]any
        for {
                p.skipSpace()
                cells, err := p.parseRow()
                if err != nil {
                        return nil, err
                }
                rows = append(rows, cells)
                p.skipSpace()
                if p.peek() == ']' || p.peek() == 0 {
                        break
                }
                if !p.consume(",") {
                        return nil, fmt.Errorf("expected , between rows at %d", p.pos)
                }
        }
        return rows, nil
}

func (p *expectedParser) parseRow() ([]any, error) {
        if !p.consume("{") {
                return nil, fmt.Errorf("expected { at %d", p.pos)
        }
        var cells []any
        for {
                p.skipSpace()
                if p.consume("}") {
                        return cells, nil
                }
                if len(cells) > 0 {
                        if !p.consume(",") {
                                return nil, fmt.Errorf("expected , between cells at %d", p.pos)
                        }
                        p.skipSpace()
                }
                val, err := p.parseValue()
                if err != nil {
                        return nil, err
                }
                cells = append(cells, val)
        }
}

func (p *expectedParser) parseValue() (any, error) {
        p.skipSpace()
        if p.peek() == '{' {
                nested, err := p.parseRow()
                if err != nil {
                        return nil, err
                }
                return nested, nil
        }
        if p.consume("NULL") {
                return nil, nil
        }
        if p.consume("true") {
                return true, nil
        }
        if p.consume("false") {
                return false, nil
        }
        if p.consume(corpusValNaN) {
                return corpusValNaN, nil
        }
        if p.consume(corpusValNInf) {
                return corpusValNInf, nil
        }
        if p.consume(corpusValInf) {
                return corpusValInf, nil
        }
        if v, ok, err := p.tryParseQuotedValue(); ok || err != nil {
                return v, err
        }
        if v, ok, err := p.tryParseTypedLiteral(); ok || err != nil {
                return v, err
        }
        if isDigit(p.peek()) || p.peek() == '-' {
                return p.readBareToken(), nil
        }
        return nil, fmt.Errorf("unexpected value at %d", p.pos)
}

func (p *expectedParser) tryParseQuotedValue() (any, bool, error) {
        if p.consume("b\"") || p.consume("b'") {
                quote := p.s[p.pos-1]
                s, err := p.readQuoted(quote)
                if err != nil {
                        return nil, true, err
                }
                return "b:" + s, true, nil
        }
        if p.peek() == '"' || p.peek() == '\'' || p.peek() == '`' {
                q := p.peek()
                p.pos++
                s, err := p.readQuoted(q)
                if err != nil {
                        return nil, true, err
                }
                return s, true, nil
        }
        return nil, false, nil
}

func (p *expectedParser) tryParseTypedLiteral() (any, bool, error) {
        for _, spec := range []struct {
                prefix string
                tag    string
        }{
                {"DATE ", "DATE:"},
                {"TIMESTAMP ", "TIMESTAMP:"},
                {"TIME ", "TIME:"},
                {"DATETIME ", "DATETIME:"},
        } {
                if !p.consume(spec.prefix) {
                        continue
                }
                p.skipSpace()
                s, err := p.readDateOrTimestampLiteral()
                if err != nil {
                        return nil, true, err
                }
                return spec.tag + s, true, nil
        }
        return nil, false, nil
}

func (p *expectedParser) readQuoted(quote byte) (string, error) {
        var b strings.Builder
        for p.pos < len(p.s) {
                c := p.s[p.pos]
                p.pos++
                if c == '\\' && p.pos < len(p.s) {
                        b.WriteByte(p.s[p.pos])
                        p.pos++
                        continue
                }
                if c == quote {
                        return b.String(), nil
                }
                b.WriteByte(c)
        }
        return "", fmt.Errorf("unterminated string at %d", p.pos)
}

func (p *expectedParser) readDateOrTimestampLiteral() (string, error) {
        p.skipSpace()
        if p.peek() == '"' || p.peek() == '\'' {
                q := p.peek()
                p.pos++
                return p.readQuoted(q)
        }
        return p.readBareToken(), nil
}

func (p *expectedParser) readBareToken() string {
        start := p.pos
        for p.pos < len(p.s) {
                c := p.s[p.pos]
                if c == ',' || c == '}' || c == ']' || c == ' ' || c == '\n' || c == '\t' {
                        break
                }
                p.pos++
        }
        return strings.TrimSpace(p.s[start:p.pos])
}

func (p *expectedParser) skipSpace() {
        for p.pos < len(p.s) {
                switch p.s[p.pos] {
                case ' ', '\t', '\n', '\r':
                        p.pos++
                default:
                        return
                }
        }
}

func (p *expectedParser) peek() byte {
        if p.pos >= len(p.s) {
                return 0
        }
        return p.s[p.pos]
}

func (p *expectedParser) consume(tok string) bool {
        p.skipSpace()
        if !strings.HasPrefix(p.s[p.pos:], tok) {
                return false
        }
        p.pos += len(tok)
        return true
}

func isDigit(c byte) bool {
        return c >= '0' && c <= '9'
}

// ToRunnerRows maps positional expected cells onto the gateway schema's
// column names for typed comparison in the fixture diff engine.
func ToRunnerRows(cells [][]any, colNames []string) []map[string]any {
        out := make([]map[string]any, 0, len(cells))
        for _, row := range cells {
                m := make(map[string]any, len(colNames))
                for i, col := range colNames {
                        if i < len(row) {
                                m[col] = normalizeExpectedCell(row[i])
                        }
                }
                out = append(out, m)
        }
        return out
}

func normalizeExpectedCell(v any) any {
        switch x := v.(type) {
        case string:
                if strings.HasPrefix(x, "b:") {
                        return x[2:]
                }
                if strings.HasPrefix(x, "DATE:") {
                        return x[5:]
                }
                if strings.HasPrefix(x, "TIMESTAMP:") {
                        return x[10:]
                }
                if strings.HasPrefix(x, "TIME:") {
                        return x[5:]
                }
                if strings.HasPrefix(x, "DATETIME:") {
                        return x[9:]
                }
                if x == corpusValNaN {
                        return "NaN"
                }
                if x == corpusValInf {
                        return corpusValInf
                }
                if x == corpusValNInf {
                        return corpusValNInf
                }
                if f, err := strconv.ParseFloat(x, 64); err == nil {
                        return f
                }
                return x
        case []any:
                // Nested struct rendered as JSON-ish for STRUCT columns until
                // the lane grows struct-aware corpus cases.
                return fmt.Sprint(x)
        default:
                return v
        }
}

package googlesqlcorpus

import (
        "encoding/json"
        "fmt"
        "os"
        "path/filepath"
        "slices"
)

// Manifest pins the passing subset and triage buckets for the lane.
type Manifest struct {
        // Pinned lists case IDs in "file::name" form that must PASS.
        Pinned []string `json:"pinned"`

        // Triage records first-run bucket assignments keyed by case ID.
        Triage map[string]TriageEntry `json:"triage,omitempty"`

        // UnsupportedFeatures skips any case declaring one of these
        // LanguageFeature tokens (without the FEATURE_ prefix).
        UnsupportedFeatures []string `json:"unsupported_features,omitempty"`
}

// TriageEntry is one case's triage classification.
type TriageEntry struct {
        Bucket  string `json:"bucket"`
        Message string `json:"message,omitempty"`
}

// CaseID returns the stable identifier for a test case.
func CaseID(tc TestCase) string {
        name := tc.Name
        if name == "" {
                name = fmt.Sprintf("line_%d", tc.Line)
        }
        return fmt.Sprintf("%s::%s", filepath.Base(tc.File), name)
}

// LoadManifest reads pinned-passing metadata from disk.
func LoadManifest(path string) (*Manifest, error) {
        b, err := os.ReadFile(path) //nolint:gosec // manifest path is CLI-controlled
        if err != nil {
                return nil, err
        }
        var m Manifest
        if err := json.Unmarshal(b, &m); err != nil {
                return nil, fmt.Errorf("decode manifest: %w", err)
        }
        return &m, nil
}

// SaveManifest writes manifest JSON atomically.
func SaveManifest(path string, m *Manifest) error {
        b, err := json.MarshalIndent(m, "", "  ")
        if err != nil {
                return err
        }
        tmp := path + ".tmp"
        if err := os.WriteFile(tmp, append(b, '\n'), 0o600); err != nil { //nolint:gosec // conformance artifact
                return err
        }
        return os.Rename(tmp, path)
}

// ShouldRun returns false when a case is outside the pinned gate or
// declares an unsupported feature.
func (m *Manifest) ShouldRun(tc TestCase, gatePinned bool) (bool, string) {
        if gatePinned && !m.isPinned(tc) {
                return false, "not in pinned manifest"
        }
        for _, feat := range tc.RequiredFeatures {
                if slices.Contains(m.UnsupportedFeatures, feat) {
                        return false, "required feature out of scope: " + feat
                }
        }
        return true, ""
}

func (m *Manifest) isPinned(tc TestCase) bool {
        id := CaseID(tc)
        return slices.Contains(m.Pinned, id)
}

package googlesqlcorpus

import (
        "fmt"
        "strings"
)

// TestFile is one vendored GoogleSQL compliance .test file.
type TestFile struct {
        Path     string
        Defaults FileDefaults
        Cases    []TestCase
}

// FileDefaults captures file-level directives such as
// [default required_features=...].
type FileDefaults struct {
        RequiredFeatures []string
}

// TestCase is one statement/expected-result pair from a .test file.
type TestCase struct {
        File             string
        Name             string
        RequiredFeatures []string
        PrepareDatabase  bool
        SQL              string
        Expected         ExpectedResult
        ExpectError      string
        Line             int // 1-based line of the case's first directive
}

// ParseFile splits a byte-identical upstream .test file into cases.
func ParseFile(path string, content string) (*TestFile, error) {
        blocks := splitTestBlocks(content)
        out := &TestFile{Path: path}
        fileDefaults := FileDefaults{}

        for _, block := range blocks {
                block = strings.TrimSpace(block)
                if block == "" {
                        continue
                }
                meta, body, err := splitMetaAndBody(block)
                if err != nil {
                        return nil, fmt.Errorf("%s: %w", path, err)
                }
                if len(meta.Defaults.RequiredFeatures) > 0 {
                        fileDefaults = mergeDefaults(fileDefaults, meta.Defaults)
                        out.Defaults = fileDefaults
                }
                if strings.TrimSpace(body) == "" {
                        continue
                }
                tc, err := parseCase(path, meta, body, fileDefaults)
                if err != nil {
                        return nil, err
                }
                out.Cases = append(out.Cases, tc)
        }
        return out, nil
}

func splitTestBlocks(content string) []string {
        lines := strings.Split(content, "\n")
        var blocks []string
        var cur strings.Builder
        for _, line := range lines {
                if strings.TrimSpace(line) == "==" {
                        blocks = append(blocks, cur.String())
                        cur.Reset()
                        continue
                }
                if cur.Len() > 0 {
                        cur.WriteByte('\n')
                }
                cur.WriteString(line)
        }
        if tail := strings.TrimSpace(cur.String()); tail != "" {
                blocks = append(blocks, tail)
        }
        return blocks
}

type blockMeta struct {
        Name             string
        RequiredFeatures []string
        PrepareDatabase  bool
        Defaults         FileDefaults
        Line             int
}

func splitMetaAndBody(block string) (blockMeta, string, error) {
        lines := strings.Split(block, "\n")
        meta := blockMeta{Line: 1}
        var bodyLines []string
        inBody := false
        for i, line := range lines {
                trim := strings.TrimSpace(line)
                if !inBody && strings.HasPrefix(trim, "[") && strings.HasSuffix(trim, "]") {
                        if err := applyDirective(trim, &meta); err != nil {
                                return blockMeta{}, "", fmt.Errorf("line %d: %w", i+1, err)
                        }
                        if meta.Line == 1 {
                                meta.Line = i + 1
                        }
                        continue
                }
                if strings.TrimSpace(line) == "" && !inBody && len(bodyLines) == 0 {
                        continue
                }
                inBody = true
                bodyLines = append(bodyLines, line)
        }
        return meta, strings.Join(bodyLines, "\n"), nil
}

func applyDirective(directive string, meta *blockMeta) error {
        inner := strings.TrimSuffix(strings.TrimPrefix(directive, "["), "]")
        if after, ok := strings.CutPrefix(inner, "default "); ok {
                key, val, ok := strings.Cut(after, "=")
                if !ok {
                        return fmt.Errorf("invalid default directive %q", directive)
                }
                switch strings.TrimSpace(key) {
                case "required_features":
                        meta.Defaults.RequiredFeatures = splitCSV(val)
                default:
                        return fmt.Errorf("unsupported default directive %q", key)
                }
                return nil
        }
        key, val, ok := strings.Cut(inner, "=")
        if !ok {
                key = inner
                val = ""
        }
        switch strings.TrimSpace(key) {
        case "name":
                meta.Name = strings.TrimSpace(val)
        case "required_features":
                meta.RequiredFeatures = splitCSV(val)
        case "prepare_database":
                meta.PrepareDatabase = true
        case "load_proto_files", "load_proto_names", "load_enum_names",
                "parameters", "labels", "forbidden_features":
                // Parsed for triage; runner skips cases that need these today.
                return nil
        default:
                // Unknown directives are ignored so upstream additions do not
                // break the parser; triage buckets them later if needed.
                return nil
        }
        return nil
}

func mergeDefaults(cur, add FileDefaults) FileDefaults {
        if len(add.RequiredFeatures) > 0 {
                cur.RequiredFeatures = add.RequiredFeatures
        }
        return cur
}

func parseCase(path string, meta blockMeta, body string, defaults FileDefaults) (TestCase, error) {
        sep := "\n--\n"
        idx := strings.Index(body, sep)
        if idx < 0 {
                sep = "--"
                idx = strings.Index(body, sep)
        }
        if idx < 0 {
                return TestCase{}, fmt.Errorf("%s case %q: missing -- separator", path, meta.Name)
        }
        sql := strings.TrimSpace(body[:idx])
        expectedRaw := strings.TrimSpace(body[idx+len(sep):])
        if sql == "" {
                return TestCase{}, fmt.Errorf("%s case %q: empty SQL", path, meta.Name)
        }
        var exp ExpectedResult
        var expectErr string
        if after, ok := strings.CutPrefix(expectedRaw, "ERROR:"); ok {
                expectErr = strings.TrimSpace(after)
        } else {
                var err error
                exp, err = ParseExpected(expectedRaw)
                if err != nil {
                        return TestCase{}, fmt.Errorf("%s case %q: parse expected: %w", path, meta.Name, err)
                }
        }
        features := append([]string{}, defaults.RequiredFeatures...)
        features = append(features, meta.RequiredFeatures...)
        return TestCase{
                File:             path,
                Name:             meta.Name,
                RequiredFeatures: dedupe(features),
                PrepareDatabase:  meta.PrepareDatabase,
                SQL:              sql,
                Expected:         exp,
                ExpectError:      expectErr,
                Line:             meta.Line,
        }, nil
}

func splitCSV(s string) []string {
        var out []string
        for part := range strings.SplitSeq(s, ",") {
                part = strings.TrimSpace(part)
                if part != "" {
                        out = append(out, part)
                }
        }
        return out
}

func dedupe(in []string) []string {
        seen := make(map[string]bool, len(in))
        var out []string
        for _, v := range in {
                if seen[v] {
                        continue
                }
                seen[v] = true
                out = append(out, v)
        }
        return out
}

package googlesqlcorpus

import (
        "context"
        "encoding/json"
        "fmt"
        "io"
        "io/fs"
        "os"
        "path/filepath"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/conformance/runner"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

const (
        BucketEngineBug         = "engine-bug"
        BucketNotYetLanded      = "not-yet-landed-route"
        BucketFeatureOutOfScope = "corpus-feature-out-of-scope"
        BucketPinnedPass        = "pinned-pass"
)

// Options configures a corpus run.
type Options struct {
        CorpusDir  string
        Manifest   *Manifest
        GatePinned bool
        TriageMode bool
        Harness    runner.HarnessOptions
        Profile    string
        ProjectID  string
        DatasetID  string
        Out        io.Writer
        Err        io.Writer
}

// Result is one case outcome.
type Result struct {
        ID         string `json:"id"`
        File       string `json:"file"`
        Name       string `json:"name"`
        Status     string `json:"status"`
        Bucket     string `json:"bucket,omitempty"`
        Message    string `json:"message,omitempty"`
        Diff       string `json:"diff,omitempty"`
        DurationMs int64  `json:"duration_ms"`
}

// Report aggregates a corpus invocation.
type Report struct {
        Summary struct {
                Total   int `json:"total"`
                Passed  int `json:"passed"`
                Failed  int `json:"failed"`
                Skipped int `json:"skipped"`
        } `json:"summary"`
        Results []Result `json:"results"`
}

// ExitCode mirrors the fixture runner semantics.
func (r *Report) ExitCode() int {
        if r == nil {
                return 2
        }
        if r.Summary.Failed > 0 {
                return 1
        }
        return 0
}

// LoadCorpusDir parses every .test file under dir.
func LoadCorpusDir(dir string) ([]TestCase, error) {
        var cases []TestCase
        err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
                if err != nil {
                        return err
                }
                if d.IsDir() || !strings.HasSuffix(path, ".test") {
                        return nil
                }
                b, err := os.ReadFile(path) //nolint:gosec // corpus dir is CLI-controlled
                if err != nil {
                        return err
                }
                tf, err := ParseFile(path, string(b))
                if err != nil {
                        return err
                }
                cases = append(cases, tf.Cases...)
                return nil
        })
        return cases, err
}

func normalizeOptions(opts *Options) {
        if opts.CorpusDir == "" {
                opts.CorpusDir = "conformance/googlesql-corpus/corpus"
        }
        if opts.Manifest == nil {
                opts.Manifest = &Manifest{}
        }
        if opts.TriageMode && opts.Manifest.Triage == nil {
                opts.Manifest.Triage = make(map[string]TriageEntry)
        }
        if opts.Profile == "" {
                opts.Profile = runner.ProfileDuckDB
        }
        if opts.ProjectID == "" {
                opts.ProjectID = "googlesql-corpus"
        }
        if opts.DatasetID == "" {
                opts.DatasetID = "ds1"
        }
        if opts.Out == nil {
                opts.Out = os.Stdout
        }
        if opts.Err == nil {
                opts.Err = os.Stderr
        }
}

// Run executes the corpus against the emulator.
func Run(ctx context.Context, opts Options) (*Report, error) {
        normalizeOptions(&opts)

        cases, err := LoadCorpusDir(opts.CorpusDir)
        if err != nil {
                return nil, err
        }

        profile, ok := runner.LookupProfile(opts.Profile)
        if !ok {
                return nil, fmt.Errorf("unknown profile %q", opts.Profile)
        }

        env, err := runner.StartEmulator(ctx, opts.Harness, profile)
        if err != nil {
                return nil, fmt.Errorf("start emulator: %w", err)
        }
        defer func() { _ = env.Close() }()

        base := env.BaseURL + "/bigquery/v2/projects/" + opts.ProjectID
        if err := seedDataset(ctx, base, opts.DatasetID); err != nil {
                return nil, fmt.Errorf("seed dataset: %w", err)
        }

        report := &Report{}
        for _, tc := range cases {
                res := runCase(ctx, base, opts, tc)
                report.Results = append(report.Results, res)
                report.Summary.Total++
                switch res.Status {
                case string(runner.StatusPass):
                        report.Summary.Passed++
                case string(runner.StatusFail):
                        report.Summary.Failed++
                case string(runner.StatusSkip):
                        report.Summary.Skipped++
                }
                _, _ = fmt.Fprintf(opts.Out, "%s %s %s\n", res.Status, res.ID, res.Message)
                if res.Diff != "" {
                        _, _ = fmt.Fprintf(opts.Out, "%s\n", res.Diff)
                }
        }
        return report, nil
}

func seedDataset(ctx context.Context, base, dataset string) error {
        body := fmt.Sprintf(
                `{"datasetReference":{"projectId":"%s","datasetId":"%s"},"location":"US"}`,
                projectIDFromBase(base), dataset)
        status, respBody, err := runner.DoRequest(ctx, base+"/datasets", []byte(body))
        if err != nil {
                return err
        }
        if status == 409 {
                return nil
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("datasets.insert -> %d: %s", status, string(respBody))
        }
        return nil
}

func runCase(ctx context.Context, base string, opts Options, tc TestCase) Result {
        started := time.Now()
        res := baseResult(tc)
        if skip, ok := skipCase(tc, opts); ok {
                return finish(skip, started)
        }

        status, body, err := runner.QueryViaGateway(ctx, base, tc.SQL)
        if err != nil {
                res.Message = "query rpc: " + err.Error()
                res.Bucket = BucketEngineBug
                return finish(res, started)
        }
        if status < 200 || status >= 300 {
                res.Message = fmt.Sprintf("query failed HTTP %d", status)
                res.Diff = string(body)
                res.Bucket = classifyFailure(tc, res.Message)
                return finish(res, started)
        }

        var run bqtypes.QueryResponse
        if err := json.Unmarshal(body, &run); err != nil {
                res.Message = "decode response: " + err.Error()
                res.Bucket = BucketEngineBug
                return finish(res, started)
        }

        cols := schemaColumns(run.Schema)
        exp := runner.Expectation{
                Match: chooseMatch(tc.Expected.Ordered),
                Rows:  ToRunnerRows(tc.Expected.Rows, cols),
        }
        if diff := runner.CompareRows(exp, run.Schema, run.Rows); diff != "" {
                res.Message = "row mismatch"
                res.Diff = diff
                res.Bucket = BucketEngineBug
                return finish(res, started)
        }

        res.Status = string(runner.StatusPass)
        res.Bucket = BucketPinnedPass
        if opts.TriageMode {
                opts.Manifest.Triage[res.ID] = TriageEntry{Bucket: BucketPinnedPass}
        }
        return finish(res, started)
}

func baseResult(tc TestCase) Result {
        return Result{
                ID:     CaseID(tc),
                File:   tc.File,
                Name:   tc.Name,
                Status: string(runner.StatusFail),
        }
}

func skipCase(tc TestCase, opts Options) (Result, bool) {
        res := baseResult(tc)
        switch {
        case tc.PrepareDatabase:
                res.Status = string(runner.StatusSkip)
                res.Bucket = BucketNotYetLanded
                res.Message = "prepare_database seeding not yet implemented"
                return res, true
        case tc.ExpectError != "":
                res.Status = string(runner.StatusSkip)
                res.Bucket = BucketFeatureOutOfScope
                res.Message = "error-expectation cases deferred in starter lane"
                return res, true
        default:
                if ok, why := opts.Manifest.ShouldRun(tc, opts.GatePinned); !ok {
                        res.Status = string(runner.StatusSkip)
                        res.Bucket = BucketFeatureOutOfScope
                        res.Message = why
                        return res, true
                }
        }
        return Result{}, false
}

func chooseMatch(ordered bool) runner.MatchMode {
        if ordered {
                return runner.MatchOrdered
        }
        return runner.MatchUnordered
}

func classifyFailure(tc TestCase, msg string) string {
        lower := strings.ToLower(msg)
        if strings.Contains(lower, "unimplemented") || strings.Contains(lower, "not implemented") {
                return BucketNotYetLanded
        }
        for _, f := range tc.RequiredFeatures {
                for _, skip := range []string{"PROTO", "JSON", "GRAPH", "PIPE", "MATCH_RECOGNIZE"} {
                        if strings.Contains(f, skip) {
                                return BucketFeatureOutOfScope
                        }
                }
        }
        return BucketEngineBug
}

func finish(r Result, started time.Time) Result {
        r.DurationMs = time.Since(started).Milliseconds()
        return r
}

func schemaColumns(schema *bqtypes.TableSchema) []string {
        if schema == nil {
                return nil
        }
        out := make([]string, len(schema.Fields))
        for i, f := range schema.Fields {
                out[i] = f.Name
        }
        return out
}

func projectIDFromBase(base string) string {
        const marker = "/projects/"
        i := strings.LastIndex(base, marker)
        if i < 0 {
                return ""
        }
        rest := base[i+len(marker):]
        if before, _, ok := strings.Cut(rest, "/"); ok {
                return before
        }
        return rest
}

package runner

import (
        "encoding/json"
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "gopkg.in/yaml.v3"
)

// ErrBaselineUpdateForbidden is returned when --update-baselines targets
// a hand-authored fixture that must not be bootstrapped from emulator output.
var ErrBaselineUpdateForbidden = errors.New(
        "update-baselines refused: protected fixture (see .cursor/rules/conformance-core-usage.mdc)")

// BaselineUpdateForbidden reports whether --update-baselines must refuse
// to rewrite this fixture.
func (fx *Fixture) BaselineUpdateForbidden() bool {
        if fx.VerifiedProduction {
                return true
        }
        return strings.Contains(filepath.ToSlash(fx.Path), "/core_usage/")
}

func refuseBaselineUpdate(fx *Fixture) error {
        if fx.BaselineUpdateForbidden() {
                return ErrBaselineUpdateForbidden
        }
        return nil
}

// rewriteFixtureRows captures the gateway's QueryResponse rows back
// into the fixture's `expected.rows` block and writes the YAML to
// disk. Used by --update-baselines to bootstrap a new fixture.
//
// We intentionally re-marshal the entire fixture rather than try to
// surgically replace the `expected:` node. Comments above the
// fixture's `name:` survive (yaml.v3 keeps them on the root node when
// we re-encode), but inline comments inside the `expected:` block
// are dropped -- the trade-off is documented in
// `conformance/README.md`.
func rewriteFixtureRows(fx *Fixture, body []byte) error {
        if err := refuseBaselineUpdate(fx); err != nil {
                return err
        }
        var run bqtypes.QueryResponse
        if err := json.Unmarshal(body, &run); err != nil {
                return fmt.Errorf("decode QueryResponse for baseline: %w", err)
        }
        cols := schemaColumns(run.Schema)
        rows := make([]map[string]any, 0, len(run.Rows))
        for _, r := range run.Rows {
                row := make(map[string]any, len(r.F))
                for i, cell := range r.F {
                        name := positionalName(cols, i)
                        row[name] = baselineCellValue(cell.V)
                }
                rows = append(rows, row)
        }
        // Preserve the fixture's existing Match mode; baseline
        // rewriting is a values-only operation, not a mode flip.
        fx.Expected = Expectation{Match: fx.Expected.Match, Rows: rows}
        return writeFixture(fx)
}

// rewriteFixtureError captures the gateway's error envelope back
// into the fixture's `expected.error` block. The runner pins the
// observed HTTP code; the message is captured as the BigQuery
// envelope's top-level `error.message` (or the first per-error
// `errors[].message` if the top-level field is empty).
func rewriteFixtureError(fx *Fixture, status int, body []byte) error {
        if err := refuseBaselineUpdate(fx); err != nil {
                return err
        }
        var env struct {
                Error struct {
                        Message string `json:"message"`
                        Errors  []struct {
                                Message string `json:"message"`
                        } `json:"errors"`
                } `json:"error"`
        }
        _ = json.Unmarshal(body, &env)
        msg := env.Error.Message
        if msg == "" && len(env.Error.Errors) > 0 {
                msg = env.Error.Errors[0].Message
        }
        fx.Expected = Expectation{Error: &ExpectedError{
                Code:            status,
                MessageContains: msg,
        }}
        return writeFixture(fx)
}

// baselineCellValue maps a wire-format cell value (string scalar,
// nested object, or REPEATED array) onto the YAML form that should
// be written back into the fixture's `expected.rows` block. Scalars
// keep their string form (BigQuery encodes everything as strings on
// the wire), NULLs land as YAML `null`, and nested structures are
// passed through `any` so the YAML encoder renders them as inline
// maps/sequences.
func baselineCellValue(v any) any {
        if v == nil {
                return nil
        }
        if s, ok := v.(string); ok {
                return s
        }
        return v
}

// writeFixture serializes the fixture and atomically replaces the
// file on disk via the standard "write-temp-then-rename" pattern.
func writeFixture(fx *Fixture) error {
        data, err := yaml.Marshal(fx)
        if err != nil {
                return fmt.Errorf("marshal fixture: %w", err)
        }
        tmp := fx.Path + ".tmp"
        if err := os.WriteFile(tmp, data, 0o600); err != nil {
                return fmt.Errorf("write tmp %s: %w", tmp, err)
        }
        if err := os.Rename(tmp, fx.Path); err != nil {
                _ = os.Remove(tmp)
                return fmt.Errorf("rename %s -> %s: %w", tmp, fx.Path, err)
        }
        return nil
}

package runner

import (
        "fmt"
        "sort"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// rowDiff dispatches the actual row vs expected comparison based on
// the fixture's declared match mode. An empty string means PASS.
//
// The diff engine is mode-aware:
//
//   - MatchOrdered: pairwise compare row i ↔ actualRows[i] with typed
//     cell comparison driven by the gateway-supplied schema (INT64
//     compares as int, FLOAT64 with a relative epsilon, etc.). This
//     is the default ordered-row comparison.
//   - MatchUnordered: treats both sides as a multiset and compares
//     after type-aware canonicalization. Use when the storage engine
//     does not guarantee row order and the query lacks ORDER BY.
//   - MatchSchemaOnly: ignores `expected.rows` entirely and checks
//     that the response schema matches `expected.schema` (or, if no
//     explicit schema is declared, the column names taken from
//     `expected.rows[0]`).
func rowDiff(exp Expectation, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
        mode := exp.Match
        if mode == "" {
                mode = MatchOrdered
        }
        if mode == MatchSchemaOnly {
                return schemaDiff(exp, schema)
        }
        if diff := schemaPreflight(exp, schema); diff != "" {
                return diff
        }
        switch mode {
        case MatchOrdered:
                return orderedRowDiff(exp.Rows, schema, actualRows)
        case MatchUnordered:
                return unorderedRowDiff(exp.Rows, schema, actualRows)
        default:
                // Loader validates mode, so this is unreachable at run time.
                return fmt.Sprintf("internal: unknown match mode %q", mode)
        }
}

// schemaPreflight enforces an opt-in column-set assertion before the
// row diff runs. If the fixture declared `expected.schema:` it must
// match the gateway's response; otherwise we are silent.
func schemaPreflight(exp Expectation, actual *bqtypes.TableSchema) string {
        if len(exp.Schema) == 0 {
                return ""
        }
        return diffSchemaList(exp.Schema, actual, true)
}

// schemaDiff is the schema_only-mode entry point. Tries the explicit
// `expected.schema:` declaration first; falls back to the column-name
// set derived from `expected.rows[0]` if the fixture writer leaned on
// the rows-as-column-template shorthand.
func schemaDiff(exp Expectation, actual *bqtypes.TableSchema) string {
        if len(exp.Schema) > 0 {
                return diffSchemaList(exp.Schema, actual, true)
        }
        // Names-only fallback. Pull the expected column set from
        // rows[0] so a writer can pin "make sure these columns come
        // back" without having to spell out the type for each one.
        if len(exp.Rows) == 0 {
                // Loader rejects this combo, so this is only a safety
                // net.
                return "schema_only: nothing to compare against (no schema:, no rows:)"
        }
        expected := make([]ExpectedColumn, 0, len(exp.Rows[0]))
        for name := range exp.Rows[0] {
                expected = append(expected, ExpectedColumn{Name: name})
        }
        sort.Slice(expected, func(i, j int) bool { return expected[i].Name < expected[j].Name })
        return diffSchemaList(expected, actual, false)
}

// diffSchemaList compares a list of expected columns against the
// gateway's schema. checkTypes=true enforces the Type field on each
// column (case-insensitive); checkTypes=false is the names-only path
// used by the rows-shorthand for schema_only.
func diffSchemaList(expected []ExpectedColumn, actual *bqtypes.TableSchema, checkTypes bool) string {
        if actual == nil || len(actual.Fields) == 0 {
                return fmt.Sprintf(
                        "schema mismatch: expected %d columns, gateway returned no schema",
                        len(expected))
        }
        if len(expected) != len(actual.Fields) {
                return renderSchemaDiff(expected, actual)
        }

        // When the fallback path supplies expected as a sorted
        // column-name set, allow the actual schema's order to differ;
        // otherwise the comparison is positional (matches BigQuery's
        // `schema.fields[]` ordering semantics).
        if !checkTypes {
                actualNames := make([]string, 0, len(actual.Fields))
                for _, f := range actual.Fields {
                        actualNames = append(actualNames, f.Name)
                }
                sort.Strings(actualNames)
                for i, n := range actualNames {
                        if !strings.EqualFold(expected[i].Name, n) {
                                return renderSchemaDiff(expected, actual)
                        }
                }
                return ""
        }

        for i, e := range expected {
                a := actual.Fields[i]
                if !strings.EqualFold(e.Name, a.Name) {
                        return renderSchemaDiff(expected, actual)
                }
                if e.Type != "" && !schemaTypesEqual(e.Type, a.Type) {
                        return renderSchemaDiff(expected, actual)
                }
                if e.Mode != "" && !strings.EqualFold(e.Mode, a.Mode) {
                        return renderSchemaDiff(expected, actual)
                }
        }
        return ""
}

// schemaTypesEqual compares fixture-declared types against the
// gateway REST schema, normalizing aliases the gateway emits
// (INTEGER/FLOAT/BOOLEAN) to their canonical fixture spellings.
func schemaTypesEqual(expected, actual string) bool {
        return normalizeSchemaType(expected) == normalizeSchemaType(actual)
}

func normalizeSchemaType(t string) string {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case bqTypeINT64, bqTypeIntegerAlias:
                return bqTypeINT64
        case bqTypeFLOAT64, bqTypeFloatAlias:
                return bqTypeFLOAT64
        case bqTypeBool, bqTypeBooleanAlias:
                return bqTypeBool
        default:
                return strings.ToUpper(strings.TrimSpace(t))
        }
}

// renderSchemaDiff prints both schemas side by side so the failing
// column or type is visible at a glance.
func renderSchemaDiff(expected []ExpectedColumn, actual *bqtypes.TableSchema) string {
        var b strings.Builder
        b.WriteString("schema mismatch\nexpected:\n")
        for _, c := range expected {
                if c.Type == "" && c.Mode == "" {
                        fmt.Fprintf(&b, "  %s\n", c.Name)
                        continue
                }
                if c.Mode == "" {
                        fmt.Fprintf(&b, "  %s:%s\n", c.Name, strings.ToUpper(c.Type))
                        continue
                }
                fmt.Fprintf(&b, "  %s:%s:%s\n", c.Name, strings.ToUpper(c.Type),
                        strings.ToUpper(c.Mode))
        }
        b.WriteString("actual:\n")
        if actual == nil || len(actual.Fields) == 0 {
                b.WriteString("  (no schema)\n")
        } else {
                for _, f := range actual.Fields {
                        if f.Mode == "" {
                                fmt.Fprintf(&b, "  %s:%s\n", f.Name, strings.ToUpper(f.Type))
                                continue
                        }
                        fmt.Fprintf(&b, "  %s:%s:%s\n", f.Name, strings.ToUpper(f.Type),
                                strings.ToUpper(f.Mode))
                }
        }
        return b.String()
}

// orderedRowDiff is the default comparison: row i is compared against
// actualRows[i] cell-by-cell. Typed comparison kicks in based on the
// column's SQL type from the gateway-supplied schema.
func orderedRowDiff(expected []map[string]any, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
        cols := schemaColumns(schema)
        types := schemaTypes(schema)
        if len(expected) == len(actualRows) {
                match := true
                for i := range expected {
                        if !rowMatchesTyped(expected[i], actualRows[i], cols, types, schema) {
                                match = false
                                break
                        }
                }
                if match {
                        return ""
                }
        }
        return unifiedDiff(
                renderExpectedRows(expected, cols, types),
                renderActualRows(actualRows, cols, types),
        )
}

// unorderedRowDiff compares the two sides as a multiset. Both sides
// are canonicalized to type-normalized strings and bucketed; any row
// with mismatched counts surfaces in the unified diff as
// "missing" (present only on the expected side) or
// "extra" (present only on the actual side).
//
// Float epsilon is best-effort under this mode: the canonicalizer
// rounds float64 values to 12 significant digits so values within
// ~1e-12 relative tolerance still bucket together. Ordered mode
// remains the right tool for fixtures whose tolerance budget is
// tighter than that.
func unorderedRowDiff(expected []map[string]any, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
        cols := schemaColumns(schema)
        types := schemaTypes(schema)

        expCanon, expLines := groupExpected(expected, cols, types)
        actCanon, actLines := groupActual(actualRows, cols, types)

        if multisetsEqual(expCanon, actCanon) {
                return ""
        }

        missing, extra := diffMultiset(expCanon, actCanon)
        sort.Strings(missing)
        sort.Strings(extra)
        sort.Strings(expLines)
        sort.Strings(actLines)
        return renderUnorderedDiff(expLines, actLines, missing, extra)
}

// groupExpected canonicalizes the expected rows and returns both the
// per-line multiset and the original (canonical) line ordering. The
// caller relies on the latter for the "expected (multiset)" stanza.
func groupExpected(expected []map[string]any, cols, types []string) (map[string]int, []string) {
        canon := make(map[string]int, len(expected))
        lines := make([]string, 0, len(expected))
        for _, r := range expected {
                line := canonicalExpectedRow(r, cols, types)
                canon[line]++
                lines = append(lines, line)
        }
        return canon, lines
}

// groupActual mirrors groupExpected for the engine-emitted rows.
func groupActual(actual []bqtypes.Row, cols, types []string) (map[string]int, []string) {
        canon := make(map[string]int, len(actual))
        lines := make([]string, 0, len(actual))
        for _, r := range actual {
                line := canonicalActualRow(r, cols, types)
                canon[line]++
                lines = append(lines, line)
        }
        return canon, lines
}

// multisetsEqual returns true when both line→count maps describe the
// same multiset (both sizes and per-key counts agree).
func multisetsEqual(a, b map[string]int) bool {
        if len(a) != len(b) {
                return false
        }
        for k, v := range a {
                if b[k] != v {
                        return false
                }
        }
        return true
}

// diffMultiset returns the lines that appear too few times on the
// actual side ("missing") and too many times on the actual side
// ("extra"). Both slices are unsorted; the caller sorts for stable
// diff output.
func diffMultiset(exp, act map[string]int) (missing, extra []string) {
        for k, v := range exp {
                for range v - act[k] {
                        missing = append(missing, k)
                }
        }
        for k, v := range act {
                for range v - exp[k] {
                        extra = append(extra, k)
                }
        }
        return missing, extra
}

// renderUnorderedDiff materializes the user-facing multiset-diff
// string. Each stanza is emitted whether or not the corresponding
// slice is empty, except `missing`/`extra` which are skipped when
// the row count is zero (so a swap-only mismatch only prints the
// two multisets without phantom "missing:" / "extra:" headers).
func renderUnorderedDiff(expLines, actLines, missing, extra []string) string {
        var b strings.Builder
        b.WriteString("unordered row mismatch\nexpected (multiset):\n")
        writeRowStanza(&b, expLines)
        b.WriteString("actual (multiset):\n")
        writeRowStanza(&b, actLines)
        if len(missing) > 0 {
                b.WriteString("missing (expected but not in actual):\n")
                for _, line := range missing {
                        b.WriteString("  ")
                        b.WriteString(line)
                        b.WriteString("\n")
                }
        }
        if len(extra) > 0 {
                b.WriteString("extra (actual but not in expected):\n")
                for _, line := range extra {
                        b.WriteString("  ")
                        b.WriteString(line)
                        b.WriteString("\n")
                }
        }
        return b.String()
}

// writeRowStanza emits the indented row block for one side of the
// unordered diff, substituting the explicit "(no rows)" sentinel
// when the slice is empty so the renderer never collapses an empty
// section silently.
func writeRowStanza(b *strings.Builder, lines []string) {
        if len(lines) == 0 {
                b.WriteString("  (no rows)\n")
                return
        }
        for _, line := range lines {
                b.WriteString("  ")
                b.WriteString(line)
                b.WriteString("\n")
        }
}

// rowMatchesTyped is the per-row typed comparator used by ordered
// mode. Returns true when every cell in `expected` matches the
// corresponding cell in `actual` under the column's SQL type
// (INT64/NUMERIC compare as numbers, FLOAT64 with epsilon, etc.).
// Missing keys on either side are surfaced as mismatches so the
// diff exposes column-name drift.
func rowMatchesTyped(
        expected map[string]any,
        actual bqtypes.Row,
        cols []string,
        types []string,
        schema *bqtypes.TableSchema,
) bool {
        for i, col := range cols {
                var actVal any
                if i < len(actual.F) {
                        actVal = actual.F[i].V
                }
                expVal, hasExp := expected[col]
                if !hasExp {
                        // Expected row lacks this column. If both sides are
                        // "missing" we treat it as NULL; otherwise it is a
                        // real divergence.
                        if actVal == nil {
                                continue
                        }
                        return false
                }
                fieldType := ""
                fieldMode := ""
                if i < len(types) {
                        fieldType = types[i]
                }
                if schema != nil && i < len(schema.Fields) {
                        fieldMode = schema.Fields[i].Mode
                }
                if !cellsEqual(expVal, actVal, fieldType, fieldMode) {
                        return false
                }
        }
        // Reject extra keys on the expected side that the schema does
        // not include; otherwise the fixture writer could pin a column
        // the engine never returned and the diff would silently pass.
        for k := range expected {
                if !containsString(cols, k) {
                        return false
                }
        }
        // Reject extra cells on the actual side that the schema does
        // not enumerate (the gateway should never do this, but the
        // belt-and-braces check keeps the diff honest if it does).
        if len(actual.F) > len(cols) {
                return false
        }
        return true
}

// schemaColumns returns the schema's column names in declared order.
func schemaColumns(schema *bqtypes.TableSchema) []string {
        if schema == nil {
                return nil
        }
        out := make([]string, len(schema.Fields))
        for i, f := range schema.Fields {
                out[i] = f.Name
        }
        return out
}

// schemaTypes returns the schema's column types in declared order.
// Empty when the schema is nil so callers can rely on positional
// lookup without bounds-checking.
func schemaTypes(schema *bqtypes.TableSchema) []string {
        if schema == nil {
                return nil
        }
        out := make([]string, len(schema.Fields))
        for i, f := range schema.Fields {
                out[i] = f.Type
        }
        return out
}

// positionalName returns the column name at position i, falling
// back to `col<i>` when the schema is absent or too short.
func positionalName(cols []string, i int) string {
        if i < len(cols) {
                return cols[i]
        }
        return fmt.Sprintf("col%d", i)
}

// canonicalExpectedRow renders an expected row into its
// type-normalized one-line form (sorted by column name) so the
// unordered bucketing can compare it byte-for-byte.
func canonicalExpectedRow(r map[string]any, cols []string, types []string) string {
        pairs := make([]string, 0, len(cols)+len(r))
        seen := make(map[string]bool, len(cols))
        for i, c := range cols {
                ft := ""
                if i < len(types) {
                        ft = types[i]
                }
                v, ok := r[c]
                if !ok {
                        pairs = append(pairs, c+"=<missing>")
                } else {
                        pairs = append(pairs, fmt.Sprintf("%s=%s", c, canonicalCell(v, ft)))
                }
                seen[c] = true
        }
        // Surface stray expected columns that the schema does not
        // know about so the diff exposes the divergence.
        extras := make([]string, 0)
        for k := range r {
                if !seen[k] {
                        extras = append(extras, k)
                }
        }
        sort.Strings(extras)
        for _, k := range extras {
                pairs = append(pairs, fmt.Sprintf("%s=%s", k, canonicalCell(r[k], "")))
        }
        return "{" + strings.Join(pairs, ", ") + "}"
}

// canonicalActualRow renders one wire-format row into the same
// canonical form `canonicalExpectedRow` emits.
func canonicalActualRow(r bqtypes.Row, cols []string, types []string) string {
        pairs := make([]string, 0, len(r.F))
        for i, cell := range r.F {
                name := positionalName(cols, i)
                ft := ""
                if i < len(types) {
                        ft = types[i]
                }
                pairs = append(pairs, fmt.Sprintf("%s=%s", name, canonicalCell(cell.V, ft)))
        }
        return "{" + strings.Join(pairs, ", ") + "}"
}

// renderExpectedRows is the diff-rendering helper for the ordered
// path. Mirrors the ordered-mode layout (one row per line, sorted
// keys) so the typed diff stays scannable.
func renderExpectedRows(rows []map[string]any, cols []string, types []string) []string {
        out := make([]string, 0, len(rows))
        for i, r := range rows {
                out = append(out, fmt.Sprintf("row %d: %s", i, canonicalExpectedRow(r, cols, types)))
        }
        return out
}

// renderActualRows is the diff-rendering helper for the actual side.
func renderActualRows(rows []bqtypes.Row, cols []string, types []string) []string {
        out := make([]string, 0, len(rows))
        for i, r := range rows {
                out = append(out, fmt.Sprintf("row %d: %s", i, canonicalActualRow(r, cols, types)))
        }
        return out
}

// unifiedDiff is the side-by-side expected-vs-actual renderer used
// for the ordered-mode mismatch path. A full Myers diff is
// deliberately not used: fixture row counts are small and a
// side-by-side listing is more legible than a hunk-grouped diff.
func unifiedDiff(expected, actual []string) string {
        var b strings.Builder
        b.WriteString("expected:\n")
        if len(expected) == 0 {
                b.WriteString("  (no rows)\n")
        }
        for _, line := range expected {
                b.WriteString("  ")
                b.WriteString(line)
                b.WriteString("\n")
        }
        b.WriteString("actual:\n")
        if len(actual) == 0 {
                b.WriteString("  (no rows)\n")
        }
        for _, line := range actual {
                b.WriteString("  ")
                b.WriteString(line)
                b.WriteString("\n")
        }
        return b.String()
}

package runner

import (
        "encoding/json"
        "fmt"
        "slices"
        "strings"
)

// CompareError compares the gateway's error envelope against an
// expected.error block. Returns an empty string on match.
func CompareError(expected ExpectedError, status int, body []byte) string {
        return errorDiff(expected, status, body)
}

// errorDiff compares the gateway's error envelope against an
// `expected.error` block and returns an empty string on match or a
// human-readable message on mismatch.
func errorDiff(expected ExpectedError, status int, body []byte) string {
        var env struct {
                Error struct {
                        Code    int    `json:"code"`
                        Message string `json:"message"`
                        Status  string `json:"status"`
                        Errors  []struct {
                                Reason  string `json:"reason"`
                                Message string `json:"message"`
                        } `json:"errors"`
                } `json:"error"`
        }
        _ = json.Unmarshal(body, &env)

        if expected.Code != 0 && expected.Code != status {
                return fmt.Sprintf("error code: expected %d, got %d (body: %s)",
                        expected.Code, status, snippet(body))
        }
        if expected.MessageContains != "" {
                hay := env.Error.Message
                if hay == "" && len(env.Error.Errors) > 0 {
                        hay = env.Error.Errors[0].Message
                }
                if !strings.Contains(hay, expected.MessageContains) {
                        return fmt.Sprintf(
                                "error message: expected to contain %q, got %q (body: %s)",
                                expected.MessageContains, hay, snippet(body))
                }
        }
        return ""
}

// snippet truncates a body for inclusion in a diff message; the
// body can be large (the engine emits ZetaSQL parse-error pointers)
// and we want the diff to stay scannable.
func snippet(b []byte) string {
        const limit = 240
        s := strings.TrimSpace(string(b))
        if len(s) > limit {
                s = s[:limit] + "..."
        }
        return s
}

func containsString(haystack []string, needle string) bool {
        return slices.Contains(haystack, needle)
}

package runner

import (
        "encoding/json"
        "fmt"
        "math"
        "math/big"
        "strconv"
        "strings"
        "time"
)

// floatRelEpsilon is the relative tolerance used when comparing
// FLOAT64 cells. 1e-9 is loose enough to absorb the round-trip
// IEEE-754 noise that BigQuery's wire encoding introduces (the
// gateway formats float64s with `strconv.FormatFloat(v, 'g', -1, 64)`
// which is bit-exact, but the YAML decoder + JSON unmarshal pair on
// the expected side does pass values through `strconv.ParseFloat`).
const floatRelEpsilon = 1e-9

// cellsEqual is the type-aware cell equality predicate. It returns
// false for NULL-vs-non-NULL pairs and otherwise delegates to a
// per-type comparator. Fall-through for unknown types is the
// canonical string-form compare.
func cellsEqual(expected, actual any, fieldType, fieldMode string) bool {
        expIsNull := isNullExpected(expected)
        actIsNull := actual == nil
        if expIsNull && actIsNull {
                return true
        }
        if expIsNull != actIsNull {
                return false
        }
        if strings.EqualFold(strings.TrimSpace(fieldMode), "REPEATED") {
                return repeatedCellsEqual(expected, actual, fieldType)
        }
        switch strings.ToUpper(strings.TrimSpace(fieldType)) {
        case bqTypeINT64, bqTypeIntegerAlias, "NUMERIC", "BIGNUMERIC":
                return numericEqual(expected, actual)
        case bqTypeFLOAT64, bqTypeFloatAlias:
                return floatEqual(expected, actual)
        case bqTypeBool, bqTypeBooleanAlias:
                return boolEqual(expected, actual)
        case "TIMESTAMP", "DATE", "DATETIME", "TIME":
                return timeEqual(expected, actual)
        case bqTypeSTRING, "BYTES":
                return stringForm(expected) == stringForm(actual)
        default:
                // Unknown / empty type (e.g. STRUCT/REPEATED at the top
                // level, or the schema is absent): fall back to the
                // stringy compare so nothing regresses for the existing
                // fixtures.
                return stringForm(expected) == stringForm(actual)
        }
}

// repeatedCellsEqual compares REPEATED column values. Expected YAML
// arrays (`["1","2"]`) are normalized alongside BigQuery REST wire
// arrays (`[{"v":"1"},{"v":"2"}]`).
func repeatedCellsEqual(expected, actual any, elemType string) bool {
        expElems, okExp := normalizeArrayElements(expected)
        actElems, okAct := normalizeArrayElements(actual)
        if !okExp || !okAct {
                return stringForm(expected) == stringForm(actual)
        }
        if len(expElems) != len(actElems) {
                return false
        }
        for i := range expElems {
                if !cellsEqual(expElems[i], actElems[i], elemType, "") {
                        return false
                }
        }
        return true
}

func normalizeArrayElements(v any) ([]any, bool) {
        arr, ok := v.([]any)
        if !ok {
                return nil, false
        }
        out := make([]any, len(arr))
        for i, el := range arr {
                if m, ok := el.(map[string]any); ok {
                        if inner, ok := m["v"]; ok {
                                out[i] = inner
                                continue
                        }
                }
                out[i] = el
        }
        return out, true
}

// isNullExpected returns true when a YAML-decoded value is the
// canonical NULL marker. Distinguishes between `nil` (YAML `null`)
// and the literal string "NULL" (which a fixture would have to
// quote explicitly).
func isNullExpected(v any) bool {
        return v == nil
}

// numericEqual compares two values as exact rationals. INT64,
// NUMERIC, and BIGNUMERIC all use this path so a YAML `1` matches
// the wire `"1"` regardless of how either side wrote it. Returns
// false when either side cannot be parsed as a rational.
func numericEqual(expected, actual any) bool {
        e := toRat(expected)
        a := toRat(actual)
        if e == nil || a == nil {
                return false
        }
        return e.Cmp(a) == 0
}

// toRat best-effort parses a value into math/big.Rat. Integers,
// floats, and strings of either are all accepted; everything else
// returns nil so cellsEqual can flag a type drift instead of a
// silent zero-vs-zero pass.
func toRat(v any) *big.Rat {
        switch x := v.(type) {
        case nil:
                return nil
        case *big.Rat:
                return x
        case int:
                return new(big.Rat).SetInt64(int64(x))
        case int32:
                return new(big.Rat).SetInt64(int64(x))
        case int64:
                return new(big.Rat).SetInt64(x)
        case uint:
                r := new(big.Rat)
                r.SetUint64(uint64(x))
                return r
        case uint32:
                r := new(big.Rat)
                r.SetUint64(uint64(x))
                return r
        case uint64:
                r := new(big.Rat)
                r.SetUint64(x)
                return r
        case float32:
                r := new(big.Rat)
                r.SetFloat64(float64(x))
                return r
        case float64:
                r := new(big.Rat)
                r.SetFloat64(x)
                return r
        case string:
                s := strings.TrimSpace(x)
                if s == "" {
                        return nil
                }
                if r, ok := new(big.Rat).SetString(s); ok {
                        return r
                }
                return nil
        }
        return nil
}

// floatEqual compares two values as float64 with a relative
// epsilon (floatRelEpsilon). Special-cases exact zero so an
// expected-zero / actual-zero pair does not divide by zero.
func floatEqual(expected, actual any) bool {
        e, ok1 := toFloat(expected)
        a, ok2 := toFloat(actual)
        if !ok1 || !ok2 {
                return false
        }
        if math.IsNaN(e) || math.IsNaN(a) {
                return math.IsNaN(e) && math.IsNaN(a)
        }
        if e == a {
                return true
        }
        diff := math.Abs(e - a)
        norm := math.Max(math.Abs(e), math.Abs(a))
        if norm == 0 {
                return diff <= floatRelEpsilon
        }
        return diff/norm <= floatRelEpsilon
}

// toFloat parses a value into float64 best-effort. Strings of digit
// literals are accepted (BigQuery's wire format encodes everything
// as a string).
func toFloat(v any) (float64, bool) {
        switch x := v.(type) {
        case nil:
                return 0, false
        case float64:
                return x, true
        case float32:
                return float64(x), true
        case int:
                return float64(x), true
        case int32:
                return float64(x), true
        case int64:
                return float64(x), true
        case uint:
                return float64(x), true
        case uint64:
                return float64(x), true
        case string:
                s := strings.TrimSpace(x)
                f, err := strconv.ParseFloat(s, 64)
                if err != nil {
                        return 0, false
                }
                return f, true
        }
        return 0, false
}

// boolEqual normalizes "true"/"false"/"1"/"0" forms (case
// insensitive) before comparison. The YAML decoder gives us a real
// bool; the wire gives us a string; the normalizer reconciles them.
func boolEqual(expected, actual any) bool {
        e, ok1 := toBool(expected)
        a, ok2 := toBool(actual)
        if !ok1 || !ok2 {
                return false
        }
        return e == a
}

// toBool returns the canonical bool form for a value. Strings are
// recognized as "true"/"false"/"t"/"f"/"1"/"0" (case insensitive);
// integers as 0/non-zero; anything else returns ok=false.
func toBool(v any) (bool, bool) {
        switch x := v.(type) {
        case bool:
                return x, true
        case string:
                switch strings.ToLower(strings.TrimSpace(x)) {
                case "true", "t", "1":
                        return true, true
                case "false", "f", "0":
                        return false, true
                }
        case int:
                return x != 0, true
        case int32:
                return x != 0, true
        case int64:
                return x != 0, true
        }
        return false, false
}

// timeEqual parses both sides as time.Time and compares for
// instant-equality. Accepts RFC3339 with optional nanoseconds, the
// SQL `YYYY-MM-DD HH:MM:SS[.fffffffff]` shape, plain dates, and the
// Unix-seconds-as-string form BigQuery uses for TIMESTAMP on the
// wire.
func timeEqual(expected, actual any) bool {
        e, ok1 := toTime(expected)
        a, ok2 := toTime(actual)
        if !ok1 || !ok2 {
                return false
        }
        return e.Equal(a)
}

var timeFormats = []string{
        time.RFC3339Nano,
        time.RFC3339,
        "2006-01-02T15:04:05.999999999",
        "2006-01-02T15:04:05",
        "2006-01-02 15:04:05.999999999 MST",
        "2006-01-02 15:04:05.999999999",
        "2006-01-02 15:04:05",
        "2006-01-02",
        "15:04:05.999999999",
        "15:04:05",
}

// toTime parses a value into time.Time. Returns ok=false when no
// recognized format matches.
func toTime(v any) (time.Time, bool) {
        switch x := v.(type) {
        case nil:
                return time.Time{}, false
        case time.Time:
                return x, true
        case string:
                if t, ok := parseTimestampString(x); ok {
                        return t, true
                }
        case int:
                return time.Unix(int64(x), 0).UTC(), true
        case int64:
                return time.Unix(x, 0).UTC(), true
        case float64:
                sec := int64(x)
                nsec := int64((x - float64(sec)) * 1e9)
                return time.Unix(sec, nsec).UTC(), true
        }
        return time.Time{}, false
}

// parseTimestampString tries the registered RFC formats first, then
// falls back to BigQuery's TIMESTAMP wire form (Unix seconds with an
// optional fractional component). Pulled out of toTime so the
// fallback's natural conditional nesting stops tripping nestif.
func parseTimestampString(raw string) (time.Time, bool) {
        s := strings.TrimSpace(raw)
        if s == "" {
                return time.Time{}, false
        }
        for _, f := range timeFormats {
                if t, err := time.Parse(f, s); err == nil {
                        return t.UTC(), true
                }
        }
        if t, ok := parseUnixSecondsString(s); ok {
                return t, true
        }
        if sec, err := strconv.ParseInt(s, 10, 64); err == nil {
                return time.Unix(sec, 0).UTC(), true
        }
        return time.Time{}, false
}

// parseUnixSecondsString parses BigQuery's `<sec>.<frac>` TIMESTAMP
// wire encoding without going through float64 (which would drop
// precision past microseconds). Returns ok=false when the input is
// not in the dotted form.
func parseUnixSecondsString(s string) (time.Time, bool) {
        before, after, ok := strings.Cut(s, ".")
        if !ok {
                return time.Time{}, false
        }
        sec, err := strconv.ParseInt(before, 10, 64)
        if err != nil {
                return time.Time{}, false
        }
        frac := after
        if len(frac) > 9 {
                frac = frac[:9]
        }
        for len(frac) < 9 {
                frac += "0"
        }
        nsec, err := strconv.ParseInt(frac, 10, 64)
        if err != nil {
                return time.Time{}, false
        }
        return time.Unix(sec, nsec).UTC(), true
}

// stringForm returns the canonical scalar string for the diff
// renderer. STRING/BYTES compare on this literal form, with the
// NULL sentinel kept distinct from the literal string "NULL".
func stringForm(v any) string {
        if v == nil {
                return "<NULL>"
        }
        switch x := v.(type) {
        case string:
                return x
        case bool:
                if x {
                        return boolLiteralTrue
                }
                return boolLiteralFalse
        case int, int32, int64, uint, uint32, uint64:
                return fmt.Sprintf("%d", x)
        case float32, float64:
                return fmt.Sprintf("%v", x)
        default:
                b, err := json.Marshal(v)
                if err != nil {
                        return fmt.Sprintf("%v", v)
                }
                return string(b)
        }
}

// canonicalCell renders one value into its type-normalized text
// form. The result is what both sides of the unordered multiset
// bucket on, so the implementation must be deterministic across
// "1" vs 1, "true" vs true, 1.0 vs "1.0", etc.
func canonicalCell(v any, fieldType string) string {
        if v == nil {
                return "<NULL>"
        }
        switch strings.ToUpper(strings.TrimSpace(fieldType)) {
        case bqTypeINT64, bqTypeIntegerAlias, "NUMERIC", "BIGNUMERIC":
                if r := toRat(v); r != nil {
                        return r.RatString()
                }
        case bqTypeFLOAT64, bqTypeFloatAlias:
                if f, ok := toFloat(v); ok {
                        // 12 significant digits absorbs ~1e-12 relative
                        // drift; ordered-mode epsilon still applies for
                        // tighter tolerances.
                        return strconv.FormatFloat(f, 'g', 12, 64)
                }
        case bqTypeBool, bqTypeBooleanAlias:
                if b, ok := toBool(v); ok {
                        if b {
                                return boolLiteralTrue
                        }
                        return boolLiteralFalse
                }
        case "TIMESTAMP", "DATE", "DATETIME", "TIME":
                if t, ok := toTime(v); ok {
                        return t.UTC().Format(time.RFC3339Nano)
                }
        }
        return stringForm(v)
}

// Package runner is the engine half of the conformance harness:
// fixture loading, profile resolution, REST execution, and row /
// error diffing. The CLI entry point lives in
// `conformance/cmd/runner`; tests that exercise the runner against a
// real `emulator_main` subprocess live alongside the CLI behind the
// `//go:build integration` tag.
//
// The package is structured so the parsing and diff logic can be unit
// tested without a running engine: see `runner_test.go`. The harness
// half (`harness.go`) is the only code that touches subprocesses.
package runner

import (
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "sort"
        "strings"

        "gopkg.in/yaml.v3"
)

// Fixture is the in-memory shape of a single YAML conformance file.
//
// See `conformance/README.md` for the worked schema. Every field on
// the wire is optional except `name` and `query`; the loader supplies
// safe defaults for the others so a fixture writer only has to spell
// out the fields they need.
type Fixture struct {
        // Name identifies the fixture in logs and diff output. By
        // convention it matches the YAML filename (without extension)
        // so a divergence between the two is easy to spot.
        Name string `yaml:"name"`

        // Description is free-form prose that gets echoed under the
        // fixture title in `--output text`. Optional.
        Description string `yaml:"description,omitempty"`

        // Profiles is the runtime matrix the fixture applies to. Empty
        // means the default profile set (today: a single local-
        // execution coordinator over DuckDB storage). Unknown profile
        // names are an error at load time so a typo is caught
        // immediately rather than masked as "fixture ran on zero
        // profiles".
        Profiles []string `yaml:"profiles,omitempty"`

        // ProjectID is the BigQuery project the runner POSTs catalog +
        // query work against. Defaults to `proj-conformance-<name>` so
        // fixtures stay isolated even when share an emulator (via
        // `--connect`).
        ProjectID string `yaml:"project_id,omitempty"`

        // DatasetID is a documentation hint; the runner does not
        // auto-create it. Use a `setup` step with `dataset: <id>` to
        // actually create the dataset.
        DatasetID string `yaml:"dataset_id,omitempty"`

        // Setup runs in order before `Query`. Each step is dispatched
        // on which discriminator field is set (`dataset`, `table`,
        // `sql`); see SetupStep.
        Setup []SetupStep `yaml:"setup,omitempty"`

        // Query is the SQL the runner POSTs to /queries and asserts on.
        // Required. For DML-only fixtures, prefer encoding the assertion
        // as a SELECT after the mutation so the diff stays declarative.
        Query string `yaml:"query"`

        // DefaultDataset is an optional request-level default dataset ID
        // sent as `defaultDataset.datasetId` on jobs.query. Unblocks bare
        // table names (`SELECT * FROM t`) matching production BigQuery
        // client behavior.
        DefaultDataset string `yaml:"default_dataset,omitempty"`

        // VerifiedProduction marks fixtures whose expected rows/errors were
        // hand-authored from production BigQuery semantics. The runner
        // refuses --update-baselines rewrites on such fixtures.
        VerifiedProduction bool `yaml:"verified_production,omitempty"`

        // OptionalDependencies lists host packages (e.g. Python modules) that
        // must be importable for the fixture to run. When any are missing the
        // runner reports SKIP instead of FAIL so host-dependent fixtures stay
        // green on machines without optional deps installed.
        OptionalDependencies []string `yaml:"optional_dependencies,omitempty"`

        // Expected pins either the expected row set or the expected
        // HTTP error envelope. Exactly one of the two must be set.
        Expected Expectation `yaml:"expected"`

        // Path is filled in by Load; not parsed from YAML.
        Path string `yaml:"-"`
}

// SetupStep is one entry in `Fixture.Setup`. The four discriminator
// fields are mutually exclusive: `Dataset` for a dataset create,
// `Table` for a table create, `Rows` for a `tabledata.insertAll`
// seed, and `SQL` for a query (typically DML or DDL). The loader
// rejects steps that set more than one or none.
type SetupStep struct {
        // Dataset is the dataset ID to create. The runner POSTs a
        // minimal `{datasetReference, location:"US"}` body against
        // `/bigquery/v2/projects/<projectId>/datasets`.
        Dataset string `yaml:"dataset,omitempty"`

        // Table is the table to create. The runner POSTs against
        // `/bigquery/v2/projects/<projectId>/datasets/<datasetId>/tables`.
        Table *TableSetup `yaml:"table,omitempty"`

        // Rows seeds a previously created table by POSTing
        // `tabledata.insertAll`. The streaming-insert path is the right
        // tool when the fixture wants to assert the streaming side of
        // the wire (separate from the DML envelope); INSERT VALUES /
        // UPDATE / DELETE now land via the local DML executor
        // (`backend/engine/semantic/dml/`), so fixtures that just
        // want seed data may use either `rows:` or an `sql:` step.
        Rows *RowsSetup `yaml:"rows,omitempty"`

        // SQL is a query the runner POSTs to /queries. Errors from the
        // gateway abort the fixture (counted as runner-internal failure,
        // not a fixture mismatch). Use this for MERGE, CREATE TABLE,
        // DROP TABLE, and the INSERT VALUES / UPDATE / DELETE shapes
        // now landed on the local DML executor (see `Rows` for the
        // streaming-insert alternative).
        SQL string `yaml:"sql,omitempty"`

        // RowAccessPolicy creates a row-access policy via the REST API.
        RowAccessPolicy *RowAccessPolicySetup `yaml:"row_access_policy,omitempty"`

        // ColumnGovernance sets column-level masking metadata via the
        // engine catalog RPC (through the gateway's tables.patch hook).
        ColumnGovernance *ColumnGovernanceSetup `yaml:"column_governance,omitempty"`

        // ConnectionFixture seeds EXTERNAL_QUERY snapshots under data_dir.
        ConnectionFixture *ConnectionFixtureSetup `yaml:"connection_fixture,omitempty"`
}

// RowsSetup describes a `tabledata.insertAll` setup step. Each entry
// in `Rows` is a column-name -> cell-value map, matching the same
// shape as `Expectation.Rows`.
type RowsSetup struct {
        Dataset string           `yaml:"dataset"`
        Table   string           `yaml:"table"`
        Rows    []map[string]any `yaml:"rows"`
}

// TableSetup describes a table to create via REST. The schema is the
// usual BigQuery TableFieldSchema shape. When External is set the
// runner POSTs an external table (Google Sheets, GCS, ...). When View
// is set the runner POSTs a logical view (a `view.query` body, the
// shape the Python/Java/Go clients send for `create_table(Table)` with
// a view definition) so fixtures can exercise the REST view-creation
// path distinctly from `CREATE VIEW` DDL.
type TableSetup struct {
        Dataset  string              `yaml:"dataset"`
        ID       string              `yaml:"id"`
        Schema   []SchemaColumn      `yaml:"schema,omitempty"`
        External *ExternalTableSetup `yaml:"external,omitempty"`
        View     *ViewTableSetup     `yaml:"view,omitempty"`
}

// ExternalTableSetup is the externalDataConfiguration block for setup.
type ExternalTableSetup struct {
        SourceFormat string   `yaml:"source_format"`
        SourceURIs   []string `yaml:"source_uris"`
        Autodetect   bool     `yaml:"autodetect,omitempty"`
}

// ViewTableSetup is the `view` block for a tables.insert setup step.
// Only the defining query is modeled; the emulator infers the view
// schema from it (matching production BigQuery, which lets clients
// omit the schema on a view insert).
type ViewTableSetup struct {
        Query string `yaml:"query"`
}

// ConnectionFixtureSetup copies committed connection snapshots into the
// emulator data_dir before EXTERNAL_QUERY runs.
type ConnectionFixtureSetup struct {
        ConnectionID string `yaml:"connection_id"`
        SourceDir    string `yaml:"source_dir"`
}

func (t *TableSetup) validate() error {
        if t.Dataset == "" {
                return errors.New("table.dataset is required")
        }
        if t.ID == "" {
                return errors.New("table.id is required")
        }
        if len(t.Schema) == 0 && t.External == nil && t.View == nil {
                return errors.New("table.schema must list at least one column (or set table.external / table.view)")
        }
        if t.External != nil && t.External.SourceFormat == "" {
                return errors.New("table.external.source_format is required")
        }
        if t.View != nil && strings.TrimSpace(t.View.Query) == "" {
                return errors.New("table.view.query is required")
        }
        return nil
}

// SchemaColumn maps directly to `bqtypes.TableFieldSchema`. We keep
// this as a runner-local struct so the YAML field names (lower-snake)
// stay decoupled from the wire-shape Go struct.
type SchemaColumn struct {
        Name        string         `yaml:"name"`
        Type        string         `yaml:"type"`
        Mode        string         `yaml:"mode,omitempty"`
        Description string         `yaml:"description,omitempty"`
        Fields      []SchemaColumn `yaml:"fields,omitempty"`
        PolicyTags  []string       `yaml:"policy_tags,omitempty"`
}

// RowAccessPolicySetup describes a rowAccessPolicies.insert setup step.
type RowAccessPolicySetup struct {
        Dataset         string   `yaml:"dataset"`
        Table           string   `yaml:"table"`
        PolicyID        string   `yaml:"policy_id"`
        FilterPredicate string   `yaml:"filter_predicate"`
        Grantees        []string `yaml:"grantees,omitempty"`
}

// ColumnGovernanceSetup sets column mask metadata on an existing table.
type ColumnGovernanceSetup struct {
        Dataset   string `yaml:"dataset"`
        Table     string `yaml:"table"`
        Column    string `yaml:"column"`
        MaskKind  string `yaml:"mask_kind"`
        PolicyTag string `yaml:"policy_tag,omitempty"`
}

// Expectation captures one of two assertion modes. Exactly one of
// `Rows` or `Error` must be set (with the exception of
// `Match==schema_only`, which may set neither and rely on the
// gateway-returned schema alone).
type Expectation struct {
        // Match controls how Rows are compared against the gateway's
        // response. One of `ordered` (default), `unordered`, or
        // `schema_only`. See `conformance/README.md` for the matching
        // semantics each mode implies.
        Match MatchMode `yaml:"match,omitempty"`

        // Schema is the optional list of expected output columns. The
        // diff engine uses it for two things:
        //
        //   1. `schema_only` mode: required for the schema-vs-schema
        //      assertion (the engine compares this list against the
        //      `QueryResponse.schema` returned by the gateway).
        //   2. `ordered` / `unordered` modes: advisory, used to
        //      double-check the column set the query actually returned
        //      before diffing rows. When omitted, the runner trusts
        //      the gateway-supplied schema.
        Schema []ExpectedColumn `yaml:"schema,omitempty"`

        // Rows is the expected row set for a successful query. Each
        // row is a column-name -> cell-value map. The diff engine
        // normalizes both sides per the column's SQL type from the
        // gateway's `QueryResponse.schema` (so INT64 `1` matches
        // `"1"`, FLOAT64 compares with a relative epsilon, NULL stays
        // distinct from the literal string "NULL", etc.). See
        // `conformance/README.md` for the full type table.
        //
        // Ignored when `Match==schema_only`.
        Rows []map[string]any `yaml:"rows,omitempty"`

        // Error pins the expected error envelope when the fixture
        // intends to verify a failure mode (e.g. invalid SQL).
        Error *ExpectedError `yaml:"error,omitempty"`

        // Route is the canonical lowercase-snake `Disposition` the
        // coordinator's `RouteClassifier` MUST have chosen for this
        // fixture (one of `duckdb_native`, `duckdb_rewrite`,
        // `duckdb_udf`, `semantic_executor`, `control_op`,
        // `local_stub`, `unsupported`; mirrors
        // `backend/engine/disposition.cc::DispositionToString`).
        // Compared against the response's
        // `Job.statistics.query.emulatorRoute` (loopback-only field
        // gated by `gateway/middleware/loopback.go`).
        //
        // For Storage Read / Write fixtures and other RPC families that
        // don't go through `LocalCoordinatorEngine`, leave this empty
        // and use `RouteStrict=false` with an empty `RouteAllowlist`
        // (the runner then skips the route assertion entirely; see
        // the package doc above the field set for the rationale).
        //
        // Ownership: `docs/ENGINE_POLICY.md`.
        Route string `yaml:"route,omitempty"`

        // RouteAllowlist enumerates the route names the runner accepts
        // when `RouteStrict=false`. Useful for shapes that are
        // deliberately flexible between, say, `duckdb_native` and
        // `duckdb_rewrite` because the transpiler's choice is an
        // implementation detail (not a fixture-meaningful behavior).
        //
        // Empty + `RouteStrict=false` AND a non-empty `Route` is the
        // "document-the-intent" pattern used by error-path fixtures:
        // the engine returns before `EmitTrailers` fires so an actual
        // route never reaches the runner, but the fixture writer can
        // still pin `route: unsupported` for the matrix walker. The
        // runner treats actual=="" as a skip in relaxed mode.
        //
        // When `RouteStrict=true` (the default) the runner ignores
        // `RouteAllowlist` and asserts the route equals `Route`
        // exactly. Spelling validation: every entry must be one of the
        // canonical disposition names; unknown entries are a
        // fixture-load error so a typo can't accidentally widen the
        // allowlist.
        RouteAllowlist []string `yaml:"route_allowlist,omitempty"`

        // RouteStrict toggles between exact-match (default) and
        // `RouteAllowlist`-membership comparison. Defaults to `true`
        // when omitted via the `*bool` indirection (a missing key is
        // strict, an explicit `false` opts in to the allowlist mode).
        // The pointer type mirrors how `Fixture` distinguishes a
        // missing optional from an explicit zero value.
        RouteStrict *bool `yaml:"route_strict,omitempty"`
}

// MatchMode is the row-comparison strategy declared by a fixture.
// Default is MatchOrdered.
type MatchMode string

const (
        // MatchOrdered (the default) compares rows pairwise in
        // declaration order. Use `ORDER BY` in the fixture query so the
        // comparison stays deterministic.
        MatchOrdered MatchMode = "ordered"

        // MatchUnordered compares rows as a multiset; the diff engine
        // canonicalizes every row to a type-normalized string and
        // asserts the two multisets are equal. Useful when the query
        // does not declare an ORDER BY and the storage engine returns
        // rows in implementation-defined order (DuckDB, parallel
        // scans, etc.).
        MatchUnordered MatchMode = "unordered"

        // MatchSchemaOnly ignores `Rows` entirely and only validates
        // the column names + types returned by the query. Useful for
        // queries whose row values are non-deterministic (CURRENT_*,
        // generated IDs) and for "dryRun" style smoke checks.
        MatchSchemaOnly MatchMode = "schema_only"
)

// ExpectedColumn is one entry in `Expectation.Schema`. The Type field
// is compared case-insensitively against the gateway's wire-format
// type (`STRING`, `INT64`, `FLOAT64`, etc.) so a fixture pinning
// `INTEGER` will still match a response advertising `INT64`.
type ExpectedColumn struct {
        Name string `yaml:"name"`
        Type string `yaml:"type"`
        Mode string `yaml:"mode,omitempty"`
}

// ExpectedError captures the assertion vocabulary for the error path.
// Both fields are optional; the runner asserts only on what is set.
type ExpectedError struct {
        // Code is the expected HTTP status code, e.g. 400 / 404 / 501.
        // Zero means "do not assert on the status code". A fixture
        // must set at least one of Code or MessageContains.
        Code int `yaml:"code,omitempty"`

        // MessageContains is a substring the runner expects to find
        // in the BigQuery error envelope's top-level `error.message`
        // field (with a fallback to `error.errors[0].message`).
        MessageContains string `yaml:"message_contains,omitempty"`
}

// defaultProfiles is the set Fixture.Profiles defaults to when the
// fixture omits it. Keep alphabetized so iteration order is stable
// across the matrix.
var defaultProfiles = []string{ProfileDuckDB}

// Load parses a single YAML file into a Fixture. It validates the
// shape (required fields, exclusivity of expectation, known profile
// names) so callers can rely on the returned Fixture being usable.
func Load(path string) (*Fixture, error) {
        // #nosec G304 -- path is fixture-discovery output controlled by
        // --fixtures flag in a CLI dev tool.
        data, err := os.ReadFile(path)
        if err != nil {
                return nil, fmt.Errorf("read %s: %w", path, err)
        }
        return loadBytes(data, path)
}

// loadBytes is the test seam for Load. Tests construct YAML in memory
// and pass it through here; production code goes via Load (which is a
// thin file-read wrapper).
func loadBytes(data []byte, path string) (*Fixture, error) {
        var f Fixture
        dec := yaml.NewDecoder(strings.NewReader(string(data)))
        dec.KnownFields(true)
        if err := dec.Decode(&f); err != nil {
                return nil, fmt.Errorf("parse %s: %w", path, err)
        }
        f.Path = path
        if err := f.normalize(); err != nil {
                return nil, fmt.Errorf("validate %s: %w", path, err)
        }
        return &f, nil
}

// LoadDir walks a directory (recursively) and returns every loadable
// `.yaml` / `.yml` fixture, sorted by path. If `pathOrDir` points at
// a regular file it loads just that file. Returns the slice and the
// first error encountered (mirroring `filepath.Walk` semantics) so a
// single bad fixture stops the run with a clear pointer rather than
// silently dropping it.
func LoadDir(pathOrDir string) ([]*Fixture, error) {
        info, err := os.Stat(pathOrDir)
        if err != nil {
                return nil, fmt.Errorf("stat %s: %w", pathOrDir, err)
        }
        if !info.IsDir() {
                f, err := Load(pathOrDir)
                if err != nil {
                        return nil, err
                }
                return []*Fixture{f}, nil
        }
        var fixtures []*Fixture
        walkErr := filepath.Walk(pathOrDir, func(p string, fi os.FileInfo, walkErr error) error {
                if walkErr != nil {
                        return walkErr
                }
                // Skip directories whose basename starts with `_`. Used
                // for `conformance/fixtures/_route_drift_example/` and
                // future quarantine families that should NOT run in
                // `task conformance:run`. The leading-underscore
                // convention mirrors Bazel's `_*_test.cc` quarantine
                // pattern. Explicitly loading the fixture file with
                // `Load(...)` still works (the runner / matrix walker
                // can opt in by passing the file path directly).
                if fi.IsDir() {
                        base := filepath.Base(p)
                        if base != filepath.Base(pathOrDir) && strings.HasPrefix(base, "_") {
                                return filepath.SkipDir
                        }
                        return nil
                }
                ext := strings.ToLower(filepath.Ext(p))
                if ext != ".yaml" && ext != ".yml" {
                        return nil
                }
                f, err := Load(p)
                if err != nil {
                        return err
                }
                fixtures = append(fixtures, f)
                return nil
        })
        if walkErr != nil {
                return nil, walkErr
        }
        sort.Slice(fixtures, func(i, j int) bool {
                return fixtures[i].Path < fixtures[j].Path
        })
        return fixtures, nil
}

// normalize applies defaults and validates required fields.
func (f *Fixture) normalize() error {
        if strings.TrimSpace(f.Name) == "" {
                return errors.New("name is required")
        }
        if strings.TrimSpace(f.Query) == "" {
                return errors.New("query is required")
        }
        if f.ProjectID == "" {
                f.ProjectID = "proj-conformance-" + sanitizeID(f.Name)
        }
        if len(f.Profiles) == 0 {
                f.Profiles = append([]string(nil), defaultProfiles...)
        }
        known := make(map[string]bool, len(KnownProfiles()))
        for _, p := range KnownProfiles() {
                known[p.Name] = true
        }
        for _, p := range f.Profiles {
                if !known[p] {
                        return fmt.Errorf("unknown profile %q (known: %s)",
                                p, strings.Join(profileNames(), ", "))
                }
        }
        if err := f.validateExpectation(); err != nil {
                return err
        }
        for i, step := range f.Setup {
                if err := step.validate(); err != nil {
                        return fmt.Errorf("setup[%d]: %w", i, err)
                }
        }
        return nil
}

func (f *Fixture) validateExpectation() error {
        if f.Expected.Match == "" {
                f.Expected.Match = MatchOrdered
        }
        switch f.Expected.Match {
        case MatchOrdered, MatchUnordered, MatchSchemaOnly:
        default:
                return fmt.Errorf(
                        "expected.match=%q is not one of ordered, unordered, schema_only",
                        f.Expected.Match)
        }

        hasRows := f.Expected.Rows != nil
        hasSchema := len(f.Expected.Schema) > 0
        hasErr := f.Expected.Error != nil
        if hasErr && (hasRows || hasSchema) {
                return errors.New(
                        "expected: error cannot be combined with rows or schema")
        }
        switch f.Expected.Match {
        case MatchSchemaOnly:
                // schema_only fixtures must either declare an explicit
                // schema: block OR a rows: block (whose first row's keys
                // are used as the expected column-name set). Otherwise
                // there is nothing to assert on.
                if !hasErr && !hasRows && !hasSchema {
                        return errors.New(
                                "expected: match=schema_only requires schema or rows (column names)")
                }
        default:
                // ordered / unordered must set rows: or error:.
                if !hasRows && !hasErr {
                        return errors.New("expected: must set either rows or error")
                }
        }
        if hasErr {
                e := f.Expected.Error
                if e.Code == 0 && e.MessageContains == "" {
                        return errors.New("expected.error: must set at least one of code or message_contains")
                }
        }
        if err := f.Expected.validateRoute(); err != nil {
                return err
        }
        return nil
}

// validateRoute enforces the spelling rules on the route assertion
// fields so a typo in `expected.route` or
// `expected.route_allowlist` fails the load instead of silently
// allowing a route the fixture writer did not intend.
func (e *Expectation) validateRoute() error {
        if e.Route != "" && !isKnownRouteName(e.Route) {
                return fmt.Errorf(
                        "expected.route=%q is not a known disposition (one of %s)",
                        e.Route, strings.Join(KnownRouteNames(), ", "))
        }
        for i, r := range e.RouteAllowlist {
                if !isKnownRouteName(r) {
                        return fmt.Errorf(
                                "expected.route_allowlist[%d]=%q is not a known disposition (one of %s)",
                                i, r, strings.Join(KnownRouteNames(), ", "))
                }
        }
        if e.RouteStrictDefault() && len(e.RouteAllowlist) > 0 {
                return errors.New(
                        "expected.route_allowlist must not be set when route_strict=true (use route_strict=false)")
        }
        return nil
}

// RouteStrictDefault reports the runner's interpretation of the
// optional `route_strict` field: true when the fixture omitted the
// key (the safe default), the explicit value otherwise. Exposed for
// the runner comparison and the matrix walker so neither has to
// duplicate the pointer-vs-default logic.
func (e *Expectation) RouteStrictDefault() bool {
        if e.RouteStrict == nil {
                return true
        }
        return *e.RouteStrict
}

func (s SetupStep) validate() error {
        count := 0
        if s.Dataset != "" {
                count++
        }
        if s.Table != nil {
                count++
                if err := s.Table.validate(); err != nil {
                        return err
                }
        }
        if s.Rows != nil {
                count++
                if s.Rows.Dataset == "" {
                        return errors.New("rows.dataset is required")
                }
                if s.Rows.Table == "" {
                        return errors.New("rows.table is required")
                }
                if len(s.Rows.Rows) == 0 {
                        return errors.New("rows.rows must list at least one row")
                }
        }
        if strings.TrimSpace(s.SQL) != "" {
                count++
        }
        if s.RowAccessPolicy != nil {
                count++
                if s.RowAccessPolicy.Dataset == "" || s.RowAccessPolicy.Table == "" ||
                        s.RowAccessPolicy.PolicyID == "" || s.RowAccessPolicy.FilterPredicate == "" {
                        return errors.New("row_access_policy requires dataset, table, policy_id, filter_predicate")
                }
        }
        if s.ColumnGovernance != nil {
                count++
                if s.ColumnGovernance.Dataset == "" || s.ColumnGovernance.Table == "" ||
                        s.ColumnGovernance.Column == "" || s.ColumnGovernance.MaskKind == "" {
                        return errors.New("column_governance requires dataset, table, column, mask_kind")
                }
        }
        if s.ConnectionFixture != nil {
                count++
                if s.ConnectionFixture.ConnectionID == "" || s.ConnectionFixture.SourceDir == "" {
                        return errors.New("connection_fixture requires connection_id and source_dir")
                }
        }
        switch count {
        case 0:
                return errors.New(
                        "setup step must set exactly one of dataset, table, rows, sql, row_access_policy, column_governance, connection_fixture",
                )
        case 1:
                return nil
        default:
                return errors.New(
                        "setup step must set exactly one of dataset, table, rows, sql, row_access_policy, column_governance",
                )
        }
}

// sanitizeID lowercases the fixture name and replaces non-[a-z0-9-]
// characters with `-`. Used to derive default project IDs that
// satisfy BigQuery's project-ID grammar (the emulator does not
// strictly enforce it today, but we keep the defaults compatible so
// fixtures port to a real backend cleanly).
func sanitizeID(s string) string {
        var b strings.Builder
        b.Grow(len(s))
        for _, r := range strings.ToLower(s) {
                switch {
                case r >= 'a' && r <= 'z', r >= '0' && r <= '9':
                        b.WriteRune(r)
                case r == '-':
                        b.WriteRune('-')
                default:
                        b.WriteRune('-')
                }
        }
        return b.String()
}

package runner

import (
        "context"
        "encoding/json"
        "fmt"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// CompareRows diffs actual query rows against an Expectation using the
// same typed-cell engine as the YAML fixture lane. Returns an empty
// string on match.
func CompareRows(exp Expectation, schema *bqtypes.TableSchema, actualRows []bqtypes.Row) string {
        return rowDiff(exp, schema, actualRows)
}

// QueryViaGateway posts a GoogleSQL statement to the gateway's
// jobs.query endpoint and returns the HTTP status plus raw body.
func QueryViaGateway(ctx context.Context, baseURL, sql string) (int, []byte, error) {
        return postQuery(ctx, baseURL, sql)
}

// SetupSQLViaGateway runs an arbitrary statement through jobs.query for
// catalog seeding (CREATE TABLE, etc.).
func SetupSQLViaGateway(ctx context.Context, baseURL, sql string) error {
        status, body, err := postQuery(ctx, baseURL, sql)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("setup sql -> %d: %s", status, snippet(body))
        }
        return nil
}

// DoRequest posts JSON to a gateway URL. Exported for sub-lanes.
func DoRequest(ctx context.Context, url string, body []byte) (int, []byte, error) {
        return doRequest(ctx, url, body)
}

func postQuery(ctx context.Context, baseURL, sql string) (int, []byte, error) {
        return postQueryWithDefaultDataset(ctx, baseURL, sql, "")
}

func postQueryWithDefaultDataset(ctx context.Context, baseURL, sql, defaultDataset string) (int, []byte, error) {
        queryBody, err := MarshalJobsQueryBody(sql, defaultDataset, nil)
        if err != nil {
                return 0, nil, err
        }
        return doRequest(ctx, baseURL+"/queries", queryBody)
}

// MarshalJobsQueryBody builds the jobs.query JSON body. Exported for sub-lanes.
func MarshalJobsQueryBody(sql, defaultDataset string, params []bqtypes.QueryParameter) ([]byte, error) {
        body := map[string]any{
                "query":        sql,
                "useLegacySql": false,
        }
        if defaultDataset != "" {
                body["defaultDataset"] = map[string]string{"datasetId": defaultDataset}
        }
        if len(params) > 0 {
                body["parameterMode"] = "NAMED"
                body["queryParameters"] = params
        }
        queryBody, err := json.Marshal(body)
        if err != nil {
                return nil, fmt.Errorf("marshal query: %w", err)
        }
        return queryBody, nil
}

func marshalJobsQueryBody(sql, defaultDataset string) ([]byte, error) {
        return MarshalJobsQueryBody(sql, defaultDataset, nil)
}

package runner

import (
        "bytes"
        "context"
        "errors"
        "fmt"
        "io"
        "net"
        "net/http"
        "net/http/httptest"
        "os"
        "os/exec"
        "path/filepath"
        "strconv"
        "strings"
        "sync"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway"
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
)

const httpMethodGet = "GET"

// spawnState captures everything needed to restart a spawned emulator with the
// same --data_dir and profile flags.
type spawnState struct {
        engineBinary string
        engineArgs   []string
        engineAddr   string
        profile      Profile
        harnessOpts  HarnessOptions
}

// engineReadyTimeout matches the value the production gateway uses;
// keeps the cold-start budget consistent across CI lanes.
const engineReadyTimeout = 30 * time.Second

// EmulatorEnv is one running emulator the runner can drive: an
// HTTP gateway (in-process) sitting in front of either a subprocess
// engine the runner spawned or an already-running engine the runner
// dialed via `--connect`.
//
// The struct intentionally mirrors `gateway/e2e/catalog_test.go::emulatorEnv`
// so a future plan could fold the two into a shared package. Today
// the e2e harness is in `package e2e` with the `integration` build
// tag, so the runner needs its own copy.
type EmulatorEnv struct {
        // BaseURL is the gateway's HTTP root. Callers concatenate
        // `/bigquery/v2/projects/...` onto it.
        BaseURL string

        httpServer *httptest.Server
        client     *engine.Client

        // cmd is set when the runner spawned `emulator_main` itself.
        // nil when --connect is in use; Close in that mode only tears
        // down the HTTP server + gRPC channel.
        cmd *exec.Cmd

        // dataDir is the temporary `--data_dir` the harness allocated
        // for this emulator. The teardown path removes it.
        dataDir string

        // spawn is set for subprocess engines so RestartEngine can relaunch
        // against the same data_dir mid-session.
        spawn *spawnState
}

// DataDir returns the scratch --data_dir for a spawned emulator (empty when --connect).
func (e *EmulatorEnv) DataDir() string {
        if e == nil {
                return ""
        }
        return e.dataDir
}

// Close terminates the subprocess (if any), closes the gRPC channel,
// and shuts down the HTTP gateway. Safe to call more than once.
func (e *EmulatorEnv) Close() error {
        if e == nil {
                return nil
        }
        var firstErr error
        if e.httpServer != nil {
                e.httpServer.Close()
        }
        if e.client != nil {
                if err := e.client.Close(); err != nil {
                        firstErr = err
                }
        }
        if e.cmd != nil && e.cmd.Process != nil {
                // Best-effort graceful shutdown: SIGINT first, then KILL
                // after a short budget. The C++ engine registers a SIGINT
                // handler that flushes the storage layer; KILL is the
                // belt-and-suspenders path for a wedged subprocess.
                _ = e.cmd.Process.Signal(os.Interrupt)
                done := make(chan struct{})
                go func() {
                        _, _ = e.cmd.Process.Wait()
                        close(done)
                }()
                select {
                case <-done:
                case <-time.After(5 * time.Second):
                        _ = e.cmd.Process.Kill()
                        <-done
                }
        }
        if e.dataDir != "" {
                if err := os.RemoveAll(e.dataDir); err != nil && firstErr == nil {
                        firstErr = err
                }
        }
        return firstErr
}

// HarnessOptions configures how the runner spins up the emulator for
// one fixture x profile execution. Callers either set EngineBinary
// (the runner spawns its own emulator subprocess and tears it down
// after the fixture) or ConnectAddress (the runner dials an
// already-running engine on `host:port`).
type HarnessOptions struct {
        // EngineBinary is the path to `emulator_main`. Defaults to
        // `./bin/emulator_main` when empty. Mutually exclusive with
        // ConnectAddress.
        EngineBinary string

        // ConnectAddress is `host:port` for an already-running engine.
        // Empty means the harness spawns its own subprocess. Mutually
        // exclusive with EngineBinary.
        ConnectAddress string

        // EngineStdout / EngineStderr receive the engine subprocess's
        // streams. nil discards them; tests typically pass `os.Stderr`
        // to keep crash output visible.
        EngineStdout io.Writer
        EngineStderr io.Writer

        // DataDirRoot is the parent directory under which the harness
        // allocates per-emulator `--data_dir` paths for the DuckDB
        // profile. Empty defers to `os.TempDir()`.
        DataDirRoot string
}

// validate enforces the exclusivity contract between EngineBinary and
// ConnectAddress and resolves the EngineBinary default.
func (o *HarnessOptions) validate() error {
        if o.EngineBinary != "" && o.ConnectAddress != "" {
                return errors.New("HarnessOptions: --engine-binary and --connect are mutually exclusive")
        }
        if o.EngineBinary == "" && o.ConnectAddress == "" {
                o.EngineBinary = filepath.Join(".", "bin", "emulator_main")
        }
        return nil
}

// StartEmulator spins up an EmulatorEnv for the given profile. The
// returned env owns its subprocess (if any) and must be Closed by the
// caller -- the harness registers no global cleanup, so the caller
// (typically the runner loop) is responsible for orderly teardown.
//
// When ConnectAddress is set the profile only controls which fixtures
// run against the connected gateway: the harness does not push
// `--engine` / `--storage` over the wire, so the connected emulator
// must already be configured for the requested profile (CI wires
// this).
func StartEmulator(ctx context.Context, opts HarnessOptions, p Profile) (*EmulatorEnv, error) {
        if err := opts.validate(); err != nil {
                return nil, err
        }
        if opts.ConnectAddress != "" {
                return startConnected(ctx, opts)
        }
        return startSpawned(ctx, opts, p)
}

// startConnected dials an already-running engine on the configured
// address and wires an in-process HTTP gateway in front of it. The
// returned env's cmd field is nil; Close only releases the channel
// and the HTTP server.
func startConnected(ctx context.Context, opts HarnessOptions) (*EmulatorEnv, error) {
        client, err := engine.Dial(opts.ConnectAddress)
        if err != nil {
                return nil, fmt.Errorf("dial connect=%s: %w", opts.ConnectAddress, err)
        }
        if err := client.WaitForReady(ctx); err != nil {
                _ = client.Close()
                return nil, fmt.Errorf("connected engine not ready at %s: %w",
                        opts.ConnectAddress, err)
        }
        handler := gateway.NewServer(gateway.Options{}, handlers.BuildDependencies(client), client)
        srv := httptest.NewServer(handler)
        return &EmulatorEnv{
                BaseURL:    srv.URL,
                httpServer: srv,
                client:     client,
        }, nil
}

// startSpawned launches a fresh `emulator_main` subprocess with the
// profile's flags, waits for its gRPC health service to flip to
// SERVING, and returns an env that owns the subprocess.
func startSpawned(ctx context.Context, opts HarnessOptions, p Profile) (*EmulatorEnv, error) {
        if _, err := os.Stat(opts.EngineBinary); err != nil {
                return nil, fmt.Errorf("engine binary not found at %s: %w "+
                        "(build with `task emulator:build-engine:bazel` or pass "+
                        "--connect HOST:PORT)", opts.EngineBinary, err)
        }
        args, dataDir, addr, err := prepareSpawnArgs(opts, p)
        if err != nil {
                return nil, err
        }
        cmd, err := launchEngine(opts, args)
        if err != nil {
                if dataDir != "" {
                        _ = os.RemoveAll(dataDir)
                }
                return nil, fmt.Errorf("spawn %s: %w", opts.EngineBinary, err)
        }

        // Once the process is alive, every error path needs to reap it.
        // `cleanup` runs on every failure below; success transfers
        // ownership to EmulatorEnv.
        var (
                client *engine.Client
                srv    *httptest.Server
        )
        cleanup := newSpawnCleanup(cmd, dataDir, &client, &srv)

        client, err = waitForReady(ctx, addr)
        if err != nil {
                cleanup()
                return nil, err
        }

        srv = httptest.NewServer(
                gateway.NewServer(
                        gateway.Options{},
                        handlers.BuildDependenciesWith(client, handlers.DepsOptions{DataDir: dataDir}),
                        client,
                ),
        )
        return &EmulatorEnv{
                BaseURL:    srv.URL,
                httpServer: srv,
                client:     client,
                cmd:        cmd,
                dataDir:    dataDir,
                spawn: &spawnState{
                        engineBinary: opts.EngineBinary,
                        engineArgs:   append([]string(nil), args...),
                        engineAddr:   addr,
                        profile:      p,
                        harnessOpts:  opts,
                },
        }, nil
}

// prepareSpawnArgs allocates a free port + scratch data_dir for a
// fresh `emulator_main`, and assembles the argv. Errors are wrapped
// so the caller can return them straight back.
func prepareSpawnArgs(opts HarnessOptions, p Profile) (args []string, dataDir, addr string, err error) {
        port, err := freePort()
        if err != nil {
                return nil, "", "", fmt.Errorf("allocate engine port: %w", err)
        }
        addr = net.JoinHostPort("127.0.0.1", strconv.Itoa(port))
        args = append([]string{"--host_port", addr}, p.EmulatorMainArgs()...)
        // DuckDB storage always needs a persistent --data_dir; give each
        // spawn its own temp directory so concurrent profile runs do not
        // collide on the same catalog.
        root := opts.DataDirRoot
        if root == "" {
                root = os.TempDir()
        }
        dataDir, err = os.MkdirTemp(root, "bq-conformance-")
        if err != nil {
                return nil, "", "", fmt.Errorf("create data_dir: %w", err)
        }
        args = append(args, "--data_dir", dataDir)
        return args, dataDir, addr, nil
}

// launchEngine fires up the configured engine binary with the
// pre-built argv and the operator-supplied stdio sinks.
func launchEngine(opts HarnessOptions, args []string) (*exec.Cmd, error) {
        // #nosec G204 -- emulator binary path is operator-supplied via
        // --engine-binary; runner is a CLI dev tool.
        cmd := exec.Command(opts.EngineBinary, args...)
        cmd.Stdout = opts.EngineStdout
        cmd.Stderr = opts.EngineStderr
        if startErr := cmd.Start(); startErr != nil {
                return nil, startErr
        }
        return cmd, nil
}

// waitForReady dials the engine's gRPC port and blocks until the
// health check flips to SERVING (or the engineReadyTimeout fires).
// Returns an *engine.Client owned by the caller.
func waitForReady(ctx context.Context, addr string) (*engine.Client, error) {
        client, err := engine.Dial(addr)
        if err != nil {
                return nil, fmt.Errorf("dial %s: %w", addr, err)
        }
        readyCtx, cancel := context.WithTimeout(ctx, engineReadyTimeout)
        defer cancel()
        if err := client.WaitForReady(readyCtx); err != nil {
                _ = client.Close()
                return nil, fmt.Errorf("emulator at %s not ready: %w", addr, err)
        }
        return client, nil
}

// newSpawnCleanup returns a sync.Once-guarded teardown closure that
// reaps the engine subprocess (SIGINT, then KILL after a 5s budget),
// unwires the gateway test server + grpc client if they made it that
// far, and removes the scratch data_dir. The pointer-to-pointer
// indirection lets startSpawned wire the closure before the client
// and httptest.Server exist.
func newSpawnCleanup(cmd *exec.Cmd, dataDir string, clientPtr **engine.Client, srvPtr **httptest.Server) func() {
        var once sync.Once
        return func() {
                once.Do(func() {
                        if srv := *srvPtr; srv != nil {
                                srv.Close()
                        }
                        if client := *clientPtr; client != nil {
                                _ = client.Close()
                        }
                        _ = cmd.Process.Signal(os.Interrupt)
                        done := make(chan struct{})
                        go func() {
                                _, _ = cmd.Process.Wait()
                                close(done)
                        }()
                        select {
                        case <-done:
                        case <-time.After(5 * time.Second):
                                _ = cmd.Process.Kill()
                                <-done
                        }
                        if dataDir != "" {
                                _ = os.RemoveAll(dataDir)
                        }
                })
        }
}

// freePort returns an available loopback TCP port. Mirrors the
// pattern `net/http/httptest` uses: bind on :0, capture the port,
// close immediately. The race with the subprocess's bind is the same
// race the standard library accepts.
func freePort() (int, error) {
        lis, err := net.Listen("tcp", "127.0.0.1:0")
        if err != nil {
                return 0, err
        }
        port := lis.Addr().(*net.TCPAddr).Port
        if err := lis.Close(); err != nil {
                return 0, err
        }
        return port, nil
}

// DoHTTPRequest issues an arbitrary JSON HTTP call against the gateway.
func DoHTTPRequest(ctx context.Context, method, url string, body []byte) (int, []byte, error) {
        method = strings.ToUpper(strings.TrimSpace(method))
        var bodyReader io.Reader
        if len(body) > 0 {
                bodyReader = bytes.NewReader(body)
        }
        req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
        if err != nil {
                return 0, nil, fmt.Errorf("build request %s %s: %w", method, url, err)
        }
        if bodyReader != nil {
                req.Header.Set("Content-Type", "application/json")
        }
        resp, err := http.DefaultClient.Do(req)
        if err != nil {
                return 0, nil, fmt.Errorf("http %s %s: %w", method, url, err)
        }
        defer func() { _ = resp.Body.Close() }()
        respBody, err := io.ReadAll(resp.Body)
        if err != nil {
                return resp.StatusCode, nil, fmt.Errorf("read body from %s %s: %w", method, url, err)
        }
        return resp.StatusCode, respBody, nil
}

// RestartEngine gracefully stops the spawned subprocess and relaunches it with
// the same argv (including --data_dir). No-op when --connect was used.
func (e *EmulatorEnv) RestartEngine(ctx context.Context) error {
        if e == nil || e.spawn == nil {
                return errors.New("restart requires a spawned emulator (not --connect mode)")
        }
        if e.httpServer != nil {
                e.httpServer.Close()
                e.httpServer = nil
        }
        if e.client != nil {
                if err := e.client.Close(); err != nil {
                        return fmt.Errorf("close engine client before restart: %w", err)
                }
                e.client = nil
        }
        if e.cmd != nil && e.cmd.Process != nil {
                _ = e.cmd.Process.Signal(os.Interrupt)
                done := make(chan struct{})
                go func() {
                        _, _ = e.cmd.Process.Wait()
                        close(done)
                }()
                select {
                case <-done:
                case <-time.After(5 * time.Second):
                        _ = e.cmd.Process.Kill()
                        <-done
                }
        }
        cmd, err := launchEngine(e.spawn.harnessOpts, e.spawn.engineArgs)
        if err != nil {
                return fmt.Errorf("restart spawn: %w", err)
        }
        e.cmd = cmd
        client, err := waitForReady(ctx, e.spawn.engineAddr)
        if err != nil {
                _ = cmd.Process.Kill()
                return err
        }
        e.client = client
        e.httpServer = httptest.NewServer(
                gateway.NewServer(gateway.Options{}, handlers.BuildDependencies(client), client))
        e.BaseURL = e.httpServer.URL
        return nil
}

// doRequest is the runner's slimmed-down HTTP helper. Every caller
// POSTs a JSON body, so the method is hard-coded to POST and the body
// is required (callers already gate on whether they have something to
// send before calling). Errors are wrapped with the URL so the
// runner-internal error path is easy to debug.
func doRequest(ctx context.Context, url string, body []byte) (int, []byte, error) {
        return DoHTTPRequest(ctx, http.MethodPost, url, body)
}

// doPatchRequest POSTs a JSON body with HTTP PATCH. Used by setup steps
// that mutate table metadata (e.g. column governance via tables.patch).
func doPatchRequest(ctx context.Context, url string, body []byte) (int, []byte, error) {
        return DoHTTPRequest(ctx, http.MethodPatch, url, body)
}

package runner

import (
        "encoding/json"
        "errors"
        "fmt"
        "os"
        "os/exec"
        "path/filepath"
        "strings"
)

const packagePreflightScript = `
import importlib.util
import json
import sys

def _module_name(pkg):
    for sep in ("==", ">=", "<=", "!=", "~=", "<", ">"):
        if sep in pkg:
            return pkg.split(sep, 1)[0].strip()
    return pkg.strip()

packages = json.load(sys.stdin)
missing = []
for pkg in packages:
    mod = _module_name(pkg)
    if importlib.util.find_spec(mod) is None:
        missing.append(pkg)
json.dump({"missing": missing}, sys.stdout)
`

// optionalDependencySkipReason returns a non-empty message when any listed
// optional dependency is absent from the configured Python interpreter.
func optionalDependencySkipReason(deps []string) string {
        if len(deps) == 0 {
                return ""
        }
        python, err := resolveConformancePython()
        if err != nil {
                return "optional dependency check: " + err.Error()
        }
        missing, err := missingPythonPackages(python, deps)
        if err != nil {
                return "optional dependency check: " + err.Error()
        }
        if len(missing) == 0 {
                return ""
        }
        return "optional dependencies not available: " + strings.Join(missing, ", ")
}

func resolveConformancePython() (string, error) {
        if p := os.Getenv("BIGQUERY_EMULATOR_PYTHON"); p != "" {
                if _, err := os.Stat(p); err != nil {
                        return "", fmt.Errorf("BIGQUERY_EMULATOR_PYTHON %q: %w", p, err)
                }
                return p, nil
        }
        if dataDir := os.Getenv("BIGQUERY_EMULATOR_DATA_DIR"); dataDir != "" {
                managed := filepath.Join(dataDir, "python-udf-env", "bin", "python3")
                if st, err := os.Stat(managed); err == nil && st.Mode()&0o111 != 0 {
                        return managed, nil
                }
        }
        if path, err := exec.LookPath("python3"); err == nil {
                return path, nil
        }
        return "", errors.New("python3 not found on PATH")
}

func missingPythonPackages(python string, packages []string) ([]string, error) {
        payload, err := json.Marshal(packages)
        if err != nil {
                return nil, err
        }
        cmd := exec.Command(python, "-c", packagePreflightScript)
        cmd.Stdin = strings.NewReader(string(payload))
        out, err := cmd.Output()
        if err != nil {
                return nil, fmt.Errorf("python preflight: %w", err)
        }
        var parsed struct {
                Missing []string `json:"missing"`
        }
        if err := json.Unmarshal(out, &parsed); err != nil {
                return nil, fmt.Errorf("decode preflight response: %w", err)
        }
        return parsed.Missing, nil
}

package runner

import "sort"

// ProfileDuckDB is the canonical conformance-profile identifier for
// the DuckDB-backed runtime. Hoisted to a package const so fixtures,
// the CLI, and the runner all reference the same spelling.
const ProfileDuckDB = "duckdb"

// Profile is one named runtime configuration the runner can drive.
// Today there is only one (`duckdb`) since the emulator ships a
// single local-execution coordinator with DuckDB storage; the type
// is kept around so the fixture / CLI surface stays generic if a
// second profile lands later.
//
// `EmulatorMainArgs` is the flag list the harness passes when
// spawning `emulator_main`; the runner does not include
// `--host_port` here because the harness picks a free port per
// spawn.
type Profile struct {
        // Name is the user-facing profile identifier. Fixtures reference
        // this in their `profiles:` field, and the runner echoes it in
        // every result line.
        Name string
}

// EmulatorMainArgs returns the `emulator_main` flag list for this
// profile. The DuckDB profile has no engine / storage selector
// flags (those were removed when the reference-impl + in-memory
// storage backends were deleted), so this is an empty slice today.
// Keeping the helper means fixtures and the runner do not have to
// branch on profile name.
func (p Profile) EmulatorMainArgs() []string {
        return nil
}

var profiles = []Profile{
        {
                Name: ProfileDuckDB,
        },
}

// KnownProfiles returns a defensive copy of the known profile table.
// The slice is alphabetized by name so the matrix iteration order is
// stable.
func KnownProfiles() []Profile {
        out := make([]Profile, len(profiles))
        copy(out, profiles)
        sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
        return out
}

// LookupProfile resolves a profile name to its Profile entry. Returns
// (zero, false) for unknown names.
func LookupProfile(name string) (Profile, bool) {
        for _, p := range profiles {
                if p.Name == name {
                        return p, true
                }
        }
        return Profile{}, false
}

func profileNames() []string {
        out := make([]string, 0, len(profiles))
        for _, p := range profiles {
                out = append(out, p.Name)
        }
        sort.Strings(out)
        return out
}

package runner

import (
        "encoding/json"
        "fmt"
        "io"
        "strings"
)

// writeTextResult prints one fixture x profile result in human-readable
// form. The format is intentionally short so a sweep over hundreds of
// fixtures stays scannable.
func writeTextResult(w io.Writer, r Result) {
        tag := string(r.Status)
        prefix := ""
        switch r.Status {
        case StatusPass:
                prefix = "PASS"
        case StatusFail:
                prefix = "FAIL"
        case StatusSkip:
                prefix = "SKIP"
        default:
                prefix = tag
        }
        _, _ = fmt.Fprintf(w, "[%s] %s (profile=%s, %dms)\n",
                prefix, r.Fixture, r.Profile, r.DurationMs)
        if r.Message != "" {
                _, _ = fmt.Fprintf(w, "       %s\n", r.Message)
        }
        if r.Diff != "" {
                for line := range strings.SplitSeq(strings.TrimRight(r.Diff, "\n"), "\n") {
                        _, _ = fmt.Fprintf(w, "       %s\n", line)
                }
        }
}

// writeTextSummary prints the matrix-level rollup. Mirrors
// `go test`'s `--- PASS` style so engineers reading the log don't
// have to learn a new vocabulary.
func writeTextSummary(w io.Writer, report *Report) {
        _, _ = fmt.Fprintf(w, "---\n")
        _, _ = fmt.Fprintf(w, "conformance: total=%d passed=%d failed=%d skipped=%d\n",
                report.Summary.Total,
                report.Summary.Passed,
                report.Summary.Failed,
                report.Summary.Skipped)
}

// writeJSONReport emits the report's machine-readable form. The
// schema is documented in `conformance/README.md` (the "JSON output
// shape" section). Plan-41 CI pivots on `schema_version`.
func writeJSONReport(w io.Writer, report *Report) error {
        enc := json.NewEncoder(w)
        enc.SetIndent("", "  ")
        return enc.Encode(report)
}

package runner

import "slices"

// Go-side mirror of the C++ canonical `Disposition` vocabulary
// `backend/engine/disposition.cc::DispositionToString` produces.
// The two sides MUST agree letter-for-letter: a fixture writer
// pinning `expected.route: duckdb_native` only PASSes when the
// engine trails `emulator_route=duckdb_native`. Keep this list
// sorted by `node_dispositions.yaml`'s priority order (low ->
// high) so a reviewer can eyeball the relative weight if it ever
// comes up.
//
// Plan ownership: `docs/ENGINE_POLICY.md`
// (this file) and `docs/ENGINE_POLICY.md`
// (the C++ source of truth). A new disposition value lands on BOTH
// sides at once.
//
// We do NOT generate this from the C++ header at build time: the
// 7-entry list churns rarely (each new entry is a multi-plan
// rollout) and the parity check in `tools/check_disposition_parity`
// already catches a C++/YAML/Go drift before it ships.

// Canonical lowercase-snake disposition names. Must mirror
// `Disposition::k*` in `backend/engine/disposition.h`.
const (
        // RouteDuckDBNative lowers to DuckDB SQL whose semantics
        // already match BigQuery exactly.
        RouteDuckDBNative = "duckdb_native"

        // RouteDuckDBRewrite lowers to DuckDB SQL via a deliberate
        // structural rewrite (struct/array shape rewrites, JSON
        // operator mapping, ...). Same executor as duckdb_native.
        RouteDuckDBRewrite = "duckdb_rewrite"

        // RouteDuckDBUDF lowers to DuckDB SQL that calls one of the
        // polyfill UDFs/macros registered at engine startup.
        RouteDuckDBUDF = "duckdb_udf"

        // RouteSemanticExecutor runs on the local row/value semantic
        // executor instead of DuckDB SQL evaluation.
        RouteSemanticExecutor = "semantic_executor"

        // RouteControlOp is the DDL / metadata / catalog op route
        // (CREATE TABLE / DROP TABLE / ALTER / pipe-DDL).
        RouteControlOp = "control_op"

        // RouteLocalStub is the deterministic BigQuery-shaped stub
        // route for specialized features (KEYS.NEW_KEYSET, CREATE
        // MODEL, ...). See `docs/ENGINE_POLICY.md`.
        RouteLocalStub = "local_stub"

        // RouteUnsupported surfaces a BigQuery-shaped `UNIMPLEMENTED`.
        // See `docs/ENGINE_POLICY.md` for
        // the unsupported families list.
        RouteUnsupported = "unsupported"
)

// knownRoutes pins the closed set in priority order (matches the
// C++ enum declaration in `disposition.h`). Iterating this slice
// gives a stable, reviewable ordering in user-facing error
// messages.
var knownRoutes = []string{
        RouteDuckDBNative,
        RouteDuckDBRewrite,
        RouteDuckDBUDF,
        RouteSemanticExecutor,
        RouteControlOp,
        RouteLocalStub,
        RouteUnsupported,
}

// KnownRouteNames returns a copy of the canonical disposition
// names in priority order. Used by validation messages and the
// matrix walker so they share one source of truth.
func KnownRouteNames() []string {
        out := make([]string, len(knownRoutes))
        copy(out, knownRoutes)
        return out
}

// isKnownRouteName reports whether `s` is one of the canonical
// disposition names.
func isKnownRouteName(s string) bool {
        return slices.Contains(knownRoutes, s)
}

package runner

import (
        "context"
        "encoding/json"
        "fmt"
        "io"
        "os"
        "slices"
        "sort"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// Status is the per-fixture verdict the runner emits.
type Status string

const (
        StatusPass Status = "PASS"
        StatusFail Status = "FAIL"
        StatusSkip Status = "SKIP"
)

// JSONSchemaVersion is the on-the-wire `schema_version` the diff CI
// pivots on. Bumped only on a breaking output-shape change.
const JSONSchemaVersion = 1

// outputFormatText is the runner's default --output format: a
// human-readable text renderer. Hoisted to a const so the default,
// the validator, and the dispatcher all reference one source of truth.
const (
        outputFormatText = "text"
        outputFormatJSON = "json"
)

// Result is one fixture x profile outcome. The JSON tags mirror what
// the diff CI consumes; keep them stable.
type Result struct {
        Fixture    string `json:"fixture"`
        Path       string `json:"path"`
        Profile    string `json:"profile"`
        Status     Status `json:"status"`
        DurationMs int64  `json:"duration_ms"`
        Message    string `json:"message,omitempty"`
        Diff       string `json:"diff,omitempty"`
}

// Summary is the matrix-level aggregate the JSON output starts with.
type Summary struct {
        Total   int `json:"total"`
        Passed  int `json:"passed"`
        Failed  int `json:"failed"`
        Skipped int `json:"skipped"`
}

// Report is the top-level JSON payload. `schema_version` is the field
// the diff CI keys off, so a downstream consumer can refuse a report
// it does not understand without parsing the rest.
type Report struct {
        SchemaVersion int      `json:"schema_version"`
        Summary       Summary  `json:"summary"`
        Results       []Result `json:"results"`
}

// Options bundles the CLI flags the runner needs to do its job. The
// CLI in `conformance/cmd/runner` parses these and hands the Options
// over without further interpretation.
type Options struct {
        // FixturesPath points at the directory (or single file) the
        // runner loads.
        FixturesPath string

        // Harness carries the engine-binary / connect / stdio settings.
        Harness HarnessOptions

        // Profiles restricts which profiles the matrix iterates over.
        // Empty means "all known profiles".
        Profiles []string

        // UpdateBaselines overwrites the `expected:` block of every
        // fixture with the actual response. Used to bootstrap new
        // fixtures. When true, every fixture is reported as PASS
        // regardless of the original expected block.
        UpdateBaselines bool

        // Output controls the renderer: "text" (default) or "json".
        Output string

        // Out / Err are the writers the renderer dispatches to.
        // Default: os.Stdout / os.Stderr.
        Out io.Writer
        Err io.Writer
}

// Run executes the conformance matrix once and returns the resulting
// Report plus a non-nil error if a runner-internal failure occurred
// (bad YAML, can't start engine, output renderer crashed, etc).
//
// A fixture FAILing returns a non-nil Report with Summary.Failed > 0
// but a nil error. The CLI maps these to the documented exit codes
// (1 vs 2). Callers that want the exit-code semantics call ExitCode
// on the returned Report.
func Run(ctx context.Context, opts Options) (*Report, error) {
        opts, err := prepareOptions(opts)
        if err != nil {
                return nil, err
        }
        fixtures, err := LoadDir(opts.FixturesPath)
        if err != nil {
                return nil, err
        }
        if len(fixtures) == 0 {
                return nil, fmt.Errorf("no fixtures found under %s", opts.FixturesPath)
        }
        enabled, err := resolveProfiles(opts.Profiles)
        if err != nil {
                return nil, err
        }
        report := iterateMatrix(ctx, fixtures, enabled, opts)
        if opts.Output == outputFormatJSON {
                if err := writeJSONReport(opts.Out, report); err != nil {
                        return report, fmt.Errorf("write json report: %w", err)
                }
        } else {
                writeTextSummary(opts.Out, report)
        }
        if opts.UpdateBaselines {
                // `--update-baselines` rewrites fixtures in-place; the
                // rewrite is wired into runOne (one rewrite per fixture x
                // profile is harmless because subsequent rewrites land on
                // the same canonical form).
                _, _ = io.WriteString(opts.Err,
                        "runner: --update-baselines overwrote `expected:` blocks; review the diff before committing\n")
        }
        return report, nil
}

// prepareOptions defaults the unset fields of Options and validates
// the values that have a closed enum (currently just --output). Pulled
// out of Run so the orchestrator stays a flat 13-line driver.
func prepareOptions(opts Options) (Options, error) {
        if opts.Out == nil {
                opts.Out = os.Stdout
        }
        if opts.Err == nil {
                opts.Err = os.Stderr
        }
        if opts.Output == "" {
                opts.Output = outputFormatText
        }
        if opts.Output != outputFormatText && opts.Output != outputFormatJSON {
                return opts, fmt.Errorf("unknown --output %q (want text or json)",
                        opts.Output)
        }
        if opts.FixturesPath == "" {
                opts.FixturesPath = "conformance/fixtures"
        }
        return opts, nil
}

// iterateMatrix is the profile x fixture cross product driver. It
// fans each cell out to runOne, accumulates per-status counters, and
// streams text-mode results to opts.Out as they complete.
func iterateMatrix(ctx context.Context, fixtures []*Fixture, enabled []Profile, opts Options) *Report {
        report := &Report{SchemaVersion: JSONSchemaVersion}
        for _, p := range enabled {
                for _, fx := range fixtures {
                        if !contains(fx.Profiles, p.Name) {
                                continue
                        }
                        result := runOne(ctx, fx, p, opts)
                        report.Results = append(report.Results, result)
                        report.Summary.Total++
                        switch result.Status {
                        case StatusPass:
                                report.Summary.Passed++
                        case StatusFail:
                                report.Summary.Failed++
                        case StatusSkip:
                                report.Summary.Skipped++
                        }
                        if opts.Output == outputFormatText {
                                writeTextResult(opts.Out, result)
                        }
                }
        }
        return report
}

// ExitCode is the recommended process exit code derived from a
// Report. The CLI calls this directly so the runner's exit semantics
// are unit-testable.
func (r *Report) ExitCode() int {
        if r == nil {
                return 2
        }
        if r.Summary.Failed > 0 {
                return 1
        }
        return 0
}

// runOne executes a single fixture x profile cell. The result is
// always non-nil; status is FAIL on any mismatch or runner-internal
// error during the lifecycle. The lifecycle is:
//
//  1. Boot a fresh emulator for the profile (or reuse the connected
//     one).
//  2. Run setup steps in order against the gateway.
//  3. Run the fixture's query against the gateway.
//  4. Diff the response against expected rows or expected error.
//
// `--update-baselines` short-circuits the diff and rewrites the
// fixture in place with the captured rows / error envelope, so the
// fixture writer can bootstrap without authoring the expected block
// by hand.
func runOne(ctx context.Context, fx *Fixture, p Profile, opts Options) Result {
        started := time.Now()
        result := Result{
                Fixture: fx.Name,
                Path:    fx.Path,
                Profile: p.Name,
                Status:  StatusFail,
        }

        if reason := optionalDependencySkipReason(fx.OptionalDependencies); reason != "" {
                result.Status = StatusSkip
                result.Message = reason
                return markDuration(result, started)
        }

        env, startErr := StartEmulator(ctx, opts.Harness, p)
        if startErr != nil {
                result.Message = "start emulator: " + startErr.Error()
                return markDuration(result, started)
        }
        defer func() {
                _ = env.Close()
        }()

        base := env.BaseURL + "/bigquery/v2/projects/" + fx.ProjectID
        if stepErr := RunSetupSteps(ctx, base, env.dataDir, fx.Setup, fx.DefaultDataset); stepErr != nil {
                result.Message = stepErr.Error()
                return markDuration(result, started)
        }

        queryBody, marshalErr := MarshalJobsQueryBody(fx.Query, fx.DefaultDataset, nil)
        if marshalErr != nil {
                result.Message = marshalErr.Error()
                return markDuration(result, started)
        }
        status, body, queryErr := doRequest(ctx, base+"/queries", queryBody)
        if queryErr != nil {
                result.Message = "query rpc: " + queryErr.Error()
                return markDuration(result, started)
        }

        if fx.Expected.Error != nil {
                return markDuration(runErrorPath(fx, opts, result, status, body), started)
        }
        return markDuration(runRowPath(fx, opts, result, status, body), started)
}

// markDuration stamps the elapsed wall time onto a Result. Pulled out
// of runOne so every early return can share the one-liner without
// re-templating the time.Since math.
func markDuration(r Result, started time.Time) Result {
        r.DurationMs = time.Since(started).Milliseconds()
        return r
}

// runErrorPath drives the error-mode branch of a fixture. It expects
// the engine to have failed (non-2xx) and the error envelope to match
// fx.Expected.Error; the --update-baselines mode rewrites the fixture
// in place using the actual response.
func runErrorPath(fx *Fixture, opts Options, result Result, status int, body []byte) Result {
        if status >= 200 && status < 300 {
                result.Message = "expected error, got success"
                result.Diff = fmt.Sprintf("status: %d\nbody: %s",
                        status, snippet(body))
                if opts.UpdateBaselines {
                        // Record the actual success result as the new
                        // baseline (rows) so the fixture writer can flip
                        // the assertion mode.
                        _ = rewriteFixtureRows(fx, body)
                }
                return result
        }
        if opts.UpdateBaselines {
                if err := rewriteFixtureError(fx, status, body); err != nil {
                        result.Message = "update-baselines: " + err.Error()
                        return result
                }
                result.Status = StatusPass
                result.Message = "baseline updated"
                return result
        }
        if diff := errorDiff(*fx.Expected.Error, status, body); diff != "" {
                result.Message = "error mismatch"
                result.Diff = diff
                return result
        }
        result.Status = StatusPass
        return result
}

// runRowPath drives the row-mode branch of a fixture. It expects a
// 2xx response carrying a QueryResponse, then either rewrites the
// fixture (--update-baselines) or diffs the rows against fx.Expected.
func runRowPath(fx *Fixture, opts Options, result Result, status int, body []byte) Result {
        if status < 200 || status >= 300 {
                result.Message = fmt.Sprintf("query failed with HTTP %d", status)
                result.Diff = "body: " + snippet(body)
                return result
        }
        var run bqtypes.QueryResponse
        if err := json.Unmarshal(body, &run); err != nil {
                result.Message = "decode QueryResponse: " + err.Error()
                result.Diff = "body: " + snippet(body)
                return result
        }
        if opts.UpdateBaselines {
                if err := rewriteFixtureRows(fx, body); err != nil {
                        result.Message = "update-baselines: " + err.Error()
                        return result
                }
                result.Status = StatusPass
                result.Message = "baseline updated"
                return result
        }
        if diff := rowDiff(fx.Expected, run.Schema, run.Rows); diff != "" {
                switch fx.Expected.Match {
                case MatchSchemaOnly:
                        result.Message = "schema mismatch"
                case MatchUnordered:
                        result.Message = "row multiset mismatch"
                default:
                        result.Message = "row mismatch"
                }
                result.Diff = diff
                return result
        }
        actualRoute := ""
        if run.Statistics != nil && run.Statistics.Query != nil {
                actualRoute = run.Statistics.Query.EmulatorRoute
        }
        if diff := routeDiff(fx.Expected, actualRoute); diff != "" {
                result.Message = "route mismatch"
                result.Diff = diff
                return result
        }
        result.Status = StatusPass
        return result
}

// routeDiff compares the response's `emulatorRoute` value against
// the fixture's pinned `expected.route` (strict mode) or
// `expected.route_allowlist` (relaxed mode). Returns an empty
// string on match. The diagnostic always names both the actual and
// expected route so a fixture writer who triggered the assertion
// can see the drift without re-running the engine.
//
// Three comparison modes, all driven off the same Expectation:
//
//  1. No assertion: `route` AND `route_allowlist` both empty. Used
//     by fixtures that pre-date the route-label machinery and by the
//     deferred Storage Read / Write fixture families that don't go
//     through the coordinator's classifier. Always passes.
//
//  2. Strict: `route_strict=true` (the default) with `route` set.
//     Actual MUST equal `route` exactly. An empty actual is a
//     hard fail because the runner always talks to a loopback
//     emulator (the loopback middleware always populates the
//     field on success-path responses).
//
//  3. Relaxed: `route_strict=false`. Actual MUST be in
//     (`route` ∪ `route_allowlist`). An empty actual is treated as
//     a skip rather than a fail so error-path fixtures (whose
//     trailer is not emitted because the engine returns before
//     `EmitTrailers` fires) can still pin `route: unsupported`
//     for matrix documentation without breaking the runner.
func routeDiff(expected Expectation, actual string) string {
        if expected.Route == "" && len(expected.RouteAllowlist) == 0 {
                return ""
        }
        strict := expected.RouteStrictDefault()
        if strict {
                if expected.Route == "" {
                        return ""
                }
                if actual == expected.Route {
                        return ""
                }
                return fmt.Sprintf(
                        "expected route: %q\nactual route:   %q\n"+
                                "(hint: expected.route_strict defaults to true; set "+
                                "route_strict: false with a route_allowlist if the "+
                                "fixture is genuinely flexible between routes)",
                        expected.Route, actual)
        }
        // Relaxed mode. Empty actual on relaxed mode is "the trailer
        // did not fire" (typically an error-path fixture) and is
        // treated as a skip; pinning `route: unsupported` on those is
        // documentation for the matrix walker, not a hard runner
        // assertion.
        if actual == "" {
                return ""
        }
        if slices.Contains(expected.RouteAllowlist, actual) {
                return ""
        }
        if expected.Route != "" && actual == expected.Route {
                return ""
        }
        allowed := append([]string{}, expected.RouteAllowlist...)
        if expected.Route != "" {
                allowed = append([]string{expected.Route}, allowed...)
        }
        return fmt.Sprintf(
                "expected route in: [%s]\nactual route:     %q",
                strings.Join(allowed, ", "), actual)
}

// resolveProfiles maps the CLI's --profile flag values to a stable
// matrix order. Empty input means "all known profiles".
func resolveProfiles(names []string) ([]Profile, error) {
        if len(names) == 0 {
                return KnownProfiles(), nil
        }
        seen := make(map[string]bool, len(names))
        out := make([]Profile, 0, len(names))
        for _, n := range names {
                if seen[n] {
                        continue
                }
                seen[n] = true
                p, ok := LookupProfile(n)
                if !ok {
                        return nil, fmt.Errorf("unknown --profile %q (known: %s)",
                                n, strings.Join(profileNames(), ", "))
                }
                out = append(out, p)
        }
        sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
        return out, nil
}

func contains(haystack []string, needle string) bool {
        return slices.Contains(haystack, needle)
}

package runner

import (
        "context"
        "encoding/json"
        "errors"
        "fmt"
        "io"
        "os"
        "strings"
        "syscall"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// SessionOptions configures a session-lane run.
type SessionOptions struct {
        SessionsPath    string
        IncludeSelfTest bool
        Harness         HarnessOptions
        Profiles        []string
        Output          string
        Out             io.Writer
        Err             io.Writer
}

// RunSessions executes every session fixture against one long-lived engine per
// session x profile cell.
func RunSessions(ctx context.Context, opts SessionOptions) (*Report, error) {
        opts, err := prepareSessionOptions(opts)
        if err != nil {
                return nil, err
        }
        sessions, err := LoadSessionDir(opts.SessionsPath, opts.IncludeSelfTest)
        if err != nil {
                return nil, err
        }
        if len(sessions) == 0 {
                return nil, fmt.Errorf("no sessions found under %s", opts.SessionsPath)
        }
        enabled, err := resolveProfiles(opts.Profiles)
        if err != nil {
                return nil, err
        }
        report := iterateSessionMatrix(ctx, sessions, enabled, opts)
        if opts.Output == outputFormatJSON {
                if err := writeJSONReport(opts.Out, report); err != nil {
                        return report, fmt.Errorf("write json report: %w", err)
                }
        } else {
                writeTextSummary(opts.Out, report)
        }
        return report, nil
}

func prepareSessionOptions(opts SessionOptions) (SessionOptions, error) {
        if opts.Out == nil {
                opts.Out = os.Stdout
        }
        if opts.Err == nil {
                opts.Err = os.Stderr
        }
        if opts.Output == "" {
                opts.Output = outputFormatText
        }
        if opts.Output != outputFormatText && opts.Output != outputFormatJSON {
                return opts, fmt.Errorf("unknown --output %q (want text or json)", opts.Output)
        }
        if opts.SessionsPath == "" {
                opts.SessionsPath = DefaultSessionsDir
        }
        return opts, nil
}

func iterateSessionMatrix(ctx context.Context, sessions []*Session, enabled []Profile, opts SessionOptions) *Report {
        report := &Report{SchemaVersion: JSONSchemaVersion}
        for _, p := range enabled {
                for _, sess := range sessions {
                        if !contains(sess.Profiles, p.Name) {
                                continue
                        }
                        result := runSession(ctx, sess, p, opts)
                        report.Results = append(report.Results, result)
                        report.Summary.Total++
                        switch result.Status {
                        case StatusPass:
                                report.Summary.Passed++
                        case StatusFail:
                                report.Summary.Failed++
                        case StatusSkip:
                                report.Summary.Skipped++
                        }
                        if opts.Output == outputFormatText {
                                writeTextResult(opts.Out, result)
                        }
                }
        }
        return report
}

func sessionProjectBase(env *EmulatorEnv, projectID string) string {
        return env.BaseURL + "/bigquery/v2/projects/" + projectID
}

func runSession(ctx context.Context, sess *Session, p Profile, opts SessionOptions) Result {
        started := time.Now()
        result := Result{
                Fixture: sess.Name,
                Path:    sess.Path,
                Profile: p.Name,
                Status:  StatusFail,
        }

        env, startErr := StartEmulator(ctx, opts.Harness, p)
        if startErr != nil {
                result.Message = "start emulator: " + startErr.Error()
                return markDuration(result, started)
        }
        defer func() { _ = env.Close() }()

        defaultDataset := sess.DefaultDataset

        for i, step := range sess.Steps {
                // Re-read BaseURL each step: RestartEngine replaces the in-process
                // gateway httptest server and updates env.BaseURL.
                base := sessionProjectBase(env, sess.ProjectID)
                if err := executeSessionStep(ctx, env, base, defaultDataset, step, fmt.Sprintf("[%d]", i)); err != nil {
                        result.Message = err.Error()
                        return finishSessionMaybeKnown(result, started, sess.KnownFailing)
                }
        }

        result.Status = StatusPass
        return markDuration(result, started)
}

func executeSessionStep(
        ctx context.Context,
        env *EmulatorEnv,
        base, defaultDataset string,
        step SessionStep,
        indexPrefix string,
) error {
        if step.Repeat > 0 {
                for n := 0; n < step.Repeat; n++ {
                        for j, nested := range step.Steps {
                                prefix := fmt.Sprintf("%s.repeat(%d)[%d]", indexPrefix, n, j)
                                if err := executeSessionStep(ctx, env, base, defaultDataset, nested, prefix); err != nil {
                                        return err
                                }
                        }
                }
                return nil
        }

        kind, err := step.kind()
        if err != nil {
                return fmt.Errorf("%s: %w", indexPrefix, err)
        }

        switch kind {
        case stepKindSetup:
                if err := RunSetupSteps(ctx, base, env.DataDir(), []SetupStep{step.asSetupStep()}, defaultDataset); err != nil {
                        return fmt.Errorf("%s: %w", indexPrefix, err)
                }
        case stepKindREST:
                if err := runRESTStep(ctx, base, step.REST); err != nil {
                        return fmt.Errorf("%s: %w", indexPrefix, err)
                }
        case stepKindRestart:
                if err := env.RestartEngine(ctx); err != nil {
                        return fmt.Errorf("%s restart: %w", indexPrefix, err)
                }
        case stepKindQuery:
                if err := runSessionQueryStep(ctx, base, defaultDataset, step, indexPrefix); err != nil {
                        return err
                }
        case stepKindAssertionOnly:
        }

        return runSessionAssertions(ctx, env, base, step, indexPrefix)
}

func runSessionQueryStep(
        ctx context.Context,
        base, defaultDataset string,
        step SessionStep,
        indexPrefix string,
) error {
        dd := defaultDataset
        if step.DefaultDataset != "" {
                dd = step.DefaultDataset
        }
        status, body, err := postQueryWithDefaultDataset(ctx, base, step.Query, dd)
        if err != nil {
                return fmt.Errorf("%s query rpc: %w", indexPrefix, err)
        }
        if step.ExpectError != nil {
                if diff := errorDiff(*step.ExpectError, status, body); diff != "" {
                        return fmt.Errorf("%s error mismatch: %s", indexPrefix, diff)
                }
                return nil
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("%s query failed with HTTP %d: %s",
                        indexPrefix, status, snippet(body))
        }
        var run bqtypes.QueryResponse
        if err := json.Unmarshal(body, &run); err != nil {
                return fmt.Errorf("%s decode QueryResponse: %w", indexPrefix, err)
        }
        exp := Expectation{Match: MatchOrdered, Rows: step.ExpectRows}
        if diff := rowDiff(exp, run.Schema, run.Rows); diff != "" {
                return fmt.Errorf("%s row mismatch: %s", indexPrefix, diff)
        }
        return nil
}

func runSessionAssertions(
        ctx context.Context,
        env *EmulatorEnv,
        base string,
        step SessionStep,
        indexPrefix string,
) error {
        if step.ExpectAlive != nil {
                alive := env.EngineAlive()
                want := *step.ExpectAlive
                if alive != want {
                        if !alive {
                                return fmt.Errorf(
                                        "%s expect_alive=true but engine subprocess has exited (signal: aborted or non-zero exit)",
                                        indexPrefix,
                                )
                        }
                        return fmt.Errorf("%s expect_alive=false but engine subprocess is still running", indexPrefix)
                }
        }
        if step.ExpectTableList != nil {
                if err := assertTableList(ctx, base, step.ExpectTableList); err != nil {
                        return fmt.Errorf("%s %w", indexPrefix, err)
                }
        }
        return nil
}

func runRESTStep(ctx context.Context, base string, rest *RESTStep) error {
        url, err := resolveRESTURL(base, rest.Path)
        if err != nil {
                return err
        }
        var body []byte
        if rest.Body != nil {
                body, err = json.Marshal(rest.Body)
                if err != nil {
                        return fmt.Errorf("marshal rest body: %w", err)
                }
        }
        status, respBody, err := DoHTTPRequest(ctx, rest.Method, url, body)
        if err != nil {
                return err
        }
        want := rest.ExpectStatus
        if want == 0 {
                if status < 200 || status >= 300 {
                        return fmt.Errorf("rest %s %s -> %d: %s",
                                rest.Method, rest.Path, status, snippet(respBody))
                }
                return nil
        }
        if status != want {
                return fmt.Errorf("rest %s %s -> %d, want %d: %s",
                        rest.Method, rest.Path, status, want, snippet(respBody))
        }
        return nil
}

func resolveRESTURL(base, path string) (string, error) {
        path = strings.TrimSpace(path)
        if path == "" {
                return "", errors.New("rest path is empty")
        }
        if strings.HasPrefix(path, "http://") || strings.HasPrefix(path, "https://") {
                return path, nil
        }
        if strings.HasPrefix(path, "/") {
                // Absolute from gateway host: strip duplicate /bigquery prefix if present.
                if before, _, ok := strings.Cut(base, "/bigquery/"); ok {
                        return before + path, nil
                }
                return base + path, nil
        }
        return strings.TrimSuffix(base, "/") + "/" + strings.TrimPrefix(path, "/"), nil
}

func assertTableList(ctx context.Context, base string, exp *TableListExpect) error {
        url := fmt.Sprintf("%s/datasets/%s/tables", base, exp.Dataset)
        status, body, err := DoHTTPRequest(ctx, httpMethodGet, url, nil)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("tables.list -> %d: %s", status, snippet(body))
        }
        ids, err := parseTableListIDs(body)
        if err != nil {
                return err
        }
        if diff := tableListDiff(exp, ids); diff != "" {
                return fmt.Errorf("expect_table_list: %s", diff)
        }
        return nil
}

func parseTableListIDs(body []byte) ([]string, error) {
        var list struct {
                Tables []struct {
                        TableReference struct {
                                TableID string `json:"tableId"`
                        } `json:"tableReference"`
                } `json:"tables"`
        }
        if err := json.Unmarshal(body, &list); err != nil {
                return nil, fmt.Errorf("decode tableList: %w", err)
        }
        out := make([]string, 0, len(list.Tables))
        for _, t := range list.Tables {
                out = append(out, t.TableReference.TableID)
        }
        return out, nil
}

func tableListDiff(exp *TableListExpect, actual []string) string {
        have := make(map[string]bool, len(actual))
        for _, id := range actual {
                have[id] = true
        }
        var missing []string
        for _, want := range exp.Contains {
                if !have[want] {
                        missing = append(missing, want)
                }
        }
        var unexpected []string
        for _, forbid := range exp.NotContains {
                if have[forbid] {
                        unexpected = append(unexpected, forbid)
                }
        }
        if len(missing) == 0 && len(unexpected) == 0 {
                return ""
        }
        var b strings.Builder
        if len(missing) > 0 {
                fmt.Fprintf(&b, "missing tables: [%s]; ", strings.Join(missing, ", "))
        }
        if len(unexpected) > 0 {
                fmt.Fprintf(&b, "forbidden tables present: [%s]; ", strings.Join(unexpected, ", "))
        }
        fmt.Fprintf(&b, "actual table ids: [%s]", strings.Join(actual, ", "))
        return strings.TrimSpace(b.String())
}

func finishSessionMaybeKnown(r Result, started time.Time, knownFailing bool) Result {
        r = markDuration(r, started)
        if knownFailing && r.Status == StatusFail {
                r.Status = StatusSkip
                r.Message = "known_failing (expected divergence): " + r.Message
        }
        return r
}

// EngineAlive reports whether the spawned engine subprocess is still running.
// Connected-mode envs (no subprocess) always return true.
func (e *EmulatorEnv) EngineAlive() bool {
        if e == nil || e.cmd == nil || e.cmd.Process == nil {
                return true
        }
        if e.cmd.ProcessState != nil && e.cmd.ProcessState.Exited() {
                return false
        }
        if err := e.cmd.Process.Signal(syscall.Signal(0)); err != nil {
                return false
        }
        return true
}

package runner

import (
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "sort"
        "strconv"
        "strings"

        "gopkg.in/yaml.v3"
)

// Session is the in-memory shape of a multi-step session YAML file under
// conformance/sessions/. Every step runs against a single long-lived engine
// process unless a restart step stops and relaunches it (same --data_dir).
type Session struct {
        Name            string        `yaml:"name"`
        Description     string        `yaml:"description,omitempty"`
        Profiles        []string      `yaml:"profiles,omitempty"`
        ProjectID       string        `yaml:"project_id,omitempty"`
        DefaultDataset  string        `yaml:"default_dataset,omitempty"`
        KnownFailing    bool          `yaml:"known_failing,omitempty"`
        KnownFailingRef string        `yaml:"known_failing_ref,omitempty"`
        Steps           []SessionStep `yaml:"steps"`

        Path string `yaml:"-"`
}

// SessionStep is one ordered operation or assertion in a session. Setup-style
// fields mirror Fixture.Setup; assertion fields may stand alone or attach to
// a query step. A repeat group sets Repeat and nested Steps.
type SessionStep struct {
        Dataset        string      `yaml:"dataset,omitempty"`
        Table          *TableSetup `yaml:"table,omitempty"`
        Rows           *RowsSetup  `yaml:"rows,omitempty"`
        SQL            string      `yaml:"sql,omitempty"`
        Query          string      `yaml:"query,omitempty"`
        DefaultDataset string      `yaml:"default_dataset,omitempty"`

        REST *RESTStep `yaml:"rest,omitempty"`

        ExpectRows      []map[string]any `yaml:"expect_rows,omitempty"`
        ExpectError     *ExpectedError   `yaml:"expect_error,omitempty"`
        ExpectTableList *TableListExpect `yaml:"expect_table_list,omitempty"`
        ExpectAlive     *bool            `yaml:"expect_alive,omitempty"`

        Repeat  int           `yaml:"repeat,omitempty"`
        Steps   []SessionStep `yaml:"steps,omitempty"`
        Restart bool          `yaml:"restart,omitempty"`
}

// RESTStep is a generic gateway REST call (method + path + optional JSON body).
type RESTStep struct {
        Method       string         `yaml:"method"`
        Path         string         `yaml:"path"`
        Body         map[string]any `yaml:"body,omitempty"`
        ExpectStatus int            `yaml:"expect_status,omitempty"`
}

// TableListExpect asserts tables.list contains (or omits) table IDs.
type TableListExpect struct {
        Dataset     string   `yaml:"dataset"`
        Contains    []string `yaml:"contains,omitempty"`
        NotContains []string `yaml:"not_contains,omitempty"`
}

// DefaultSessionsDir is the committed session fixture root.
const DefaultSessionsDir = "conformance/sessions"

// LoadSession parses one session YAML file.
func LoadSession(path string) (*Session, error) {
        data, err := os.ReadFile(path) //nolint:gosec // path is CLI-controlled
        if err != nil {
                return nil, fmt.Errorf("read %s: %w", path, err)
        }
        return loadSessionBytes(data, path)
}

func loadSessionBytes(data []byte, path string) (*Session, error) {
        var s Session
        dec := yaml.NewDecoder(strings.NewReader(string(data)))
        dec.KnownFields(true)
        if err := dec.Decode(&s); err != nil {
                return nil, fmt.Errorf("parse %s: %w", path, err)
        }
        s.Path = path
        if err := s.normalize(); err != nil {
                return nil, fmt.Errorf("validate %s: %w", path, err)
        }
        return &s, nil
}

// LoadSessionDir walks a directory (or loads a single file) and returns every
// loadable session, sorted by path. Directories whose basename starts with "_"
// are skipped unless includeSelfTest is true.
func LoadSessionDir(pathOrDir string, includeSelfTest bool) ([]*Session, error) {
        info, err := os.Stat(pathOrDir)
        if err != nil {
                return nil, fmt.Errorf("stat %s: %w", pathOrDir, err)
        }
        if !info.IsDir() {
                s, err := LoadSession(pathOrDir)
                if err != nil {
                        return nil, err
                }
                return []*Session{s}, nil
        }
        var sessions []*Session
        walkErr := filepath.Walk(pathOrDir, func(p string, fi os.FileInfo, walkErr error) error {
                if walkErr != nil {
                        return walkErr
                }
                if fi.IsDir() {
                        base := filepath.Base(p)
                        if base != filepath.Base(pathOrDir) && strings.HasPrefix(base, "_") {
                                if includeSelfTest {
                                        return nil
                                }
                                return filepath.SkipDir
                        }
                        return nil
                }
                if !includeSelfTest && strings.HasPrefix(filepath.Base(p), "_") {
                        return nil
                }
                ext := strings.ToLower(filepath.Ext(p))
                if ext != ".yaml" && ext != ".yml" {
                        return nil
                }
                s, err := LoadSession(p)
                if err != nil {
                        return err
                }
                sessions = append(sessions, s)
                return nil
        })
        if walkErr != nil {
                return nil, walkErr
        }
        sort.Slice(sessions, func(i, j int) bool { return sessions[i].Path < sessions[j].Path })
        return sessions, nil
}

func (s *Session) normalize() error {
        if strings.TrimSpace(s.Name) == "" {
                return errors.New("name is required")
        }
        if len(s.Steps) == 0 {
                return errors.New("steps must list at least one entry")
        }
        if s.ProjectID == "" {
                s.ProjectID = "proj-session-" + sanitizeID(s.Name)
        }
        if len(s.Profiles) == 0 {
                s.Profiles = append([]string(nil), defaultProfiles...)
        }
        known := make(map[string]bool, len(KnownProfiles()))
        for _, p := range KnownProfiles() {
                known[p.Name] = true
        }
        for _, p := range s.Profiles {
                if !known[p] {
                        return fmt.Errorf("unknown profile %q (known: %s)",
                                p, strings.Join(profileNames(), ", "))
                }
        }
        for i, step := range s.Steps {
                if err := step.validate(strconv.Itoa(i)); err != nil {
                        return err
                }
        }
        return nil
}

func (step *SessionStep) validate(indexPrefix string) error {
        if step.Repeat > 0 {
                if len(step.Steps) == 0 {
                        return fmt.Errorf("steps[%s]: repeat requires nested steps", indexPrefix)
                }
                for i, nested := range step.Steps {
                        if err := nested.validate(fmt.Sprintf("%s.repeat[%d]", indexPrefix, i)); err != nil {
                                return err
                        }
                }
                return nil
        }
        kind, err := step.kind()
        if err != nil {
                return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
        }
        switch kind {
        case stepKindQuery:
                if len(step.ExpectRows) == 0 && step.ExpectError == nil {
                        return fmt.Errorf("steps[%s]: query step requires expect_rows or expect_error", indexPrefix)
                }
        case stepKindAssertionOnly:
                if step.ExpectAlive == nil && step.ExpectTableList == nil && step.ExpectError == nil {
                        return fmt.Errorf(
                                "steps[%s]: assertion step must set expect_alive, expect_table_list, or expect_error",
                                indexPrefix,
                        )
                }
        case stepKindSetup:
                if err := step.asSetupStep().validate(); err != nil {
                        return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
                }
        case stepKindREST:
                if err := step.REST.validate(); err != nil {
                        return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
                }
        case stepKindRestart:
                if step.Restart {
                        return nil
                }
        }
        if step.ExpectTableList != nil {
                if err := step.ExpectTableList.validate(); err != nil {
                        return fmt.Errorf("steps[%s]: %w", indexPrefix, err)
                }
        }
        return nil
}

func (r *RESTStep) validate() error {
        if strings.TrimSpace(r.Method) == "" {
                return errors.New("rest.method is required")
        }
        if strings.TrimSpace(r.Path) == "" {
                return errors.New("rest.path is required")
        }
        return nil
}

func (e *TableListExpect) validate() error {
        if e.Dataset == "" {
                return errors.New("expect_table_list.dataset is required")
        }
        if len(e.Contains) == 0 && len(e.NotContains) == 0 {
                return errors.New("expect_table_list must set contains and/or not_contains")
        }
        return nil
}

type sessionStepKind int

const (
        stepKindSetup sessionStepKind = iota
        stepKindQuery
        stepKindREST
        stepKindRestart
        stepKindAssertionOnly
)

func (step *SessionStep) kind() (sessionStepKind, error) {
        if step.Repeat > 0 {
                return 0, errors.New("repeat groups are handled separately")
        }
        count := 0
        var kind sessionStepKind
        if step.Dataset != "" || step.Table != nil || step.Rows != nil || strings.TrimSpace(step.SQL) != "" {
                count++
                kind = stepKindSetup
        }
        if strings.TrimSpace(step.Query) != "" {
                count++
                kind = stepKindQuery
        }
        if step.REST != nil {
                count++
                kind = stepKindREST
        }
        if step.Restart {
                count++
                kind = stepKindRestart
        }
        if count == 0 {
                if step.ExpectAlive != nil || step.ExpectTableList != nil || step.ExpectError != nil {
                        return stepKindAssertionOnly, nil
                }
                return 0, errors.New("empty step")
        }
        if count > 1 {
                return 0, errors.New("step must set exactly one operation")
        }
        return kind, nil
}

func (step *SessionStep) asSetupStep() SetupStep {
        return SetupStep{
                Dataset:          step.Dataset,
                Table:            step.Table,
                Rows:             step.Rows,
                SQL:              step.SQL,
                RowAccessPolicy:  nil,
                ColumnGovernance: nil,
        }
}

package runner

import (
        "context"
        "encoding/json"
        "errors"
        "fmt"
        "os"
        "path/filepath"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/external/connectionfixture"
)

// RunSetupSteps executes every setup step against the gateway base URL.
// Exported for sub-lanes (differential replay, production recorders).
func RunSetupSteps(ctx context.Context, base string, dataDir string, steps []SetupStep, defaultDataset string) error {
        for i, step := range steps {
                if err := runSetupStep(ctx, base, dataDir, step, defaultDataset); err != nil {
                        return fmt.Errorf("setup[%d]: %w", i, err)
                }
        }
        return nil
}

// ValidateExported exposes setup-step validation for corpus loaders.
func (s SetupStep) ValidateExported() error {
        return s.validate()
}

// runSetupStep dispatches one setup step to the matching helper.
// Errors bubble up unchanged; the caller wraps them with the step
// index for the diff message.
func runSetupStep(ctx context.Context, base string, dataDir string, step SetupStep, defaultDataset string) error {
        switch {
        case step.Dataset != "":
                return setupDataset(ctx, base, step.Dataset)
        case step.Table != nil:
                return setupTable(ctx, base, step.Table)
        case step.Rows != nil:
                return setupRows(ctx, base, step.Rows)
        case strings.TrimSpace(step.SQL) != "":
                return setupSQL(ctx, base, step.SQL, defaultDataset)
        case step.RowAccessPolicy != nil:
                return setupRowAccessPolicy(ctx, base, step.RowAccessPolicy)
        case step.ColumnGovernance != nil:
                return setupColumnGovernance(ctx, base, step.ColumnGovernance)
        case step.ConnectionFixture != nil:
                return setupConnectionFixture(dataDir, step.ConnectionFixture)
        default:
                return errors.New("empty setup step (validated at load time)")
        }
}

func setupConnectionFixture(dataDir string, cf *ConnectionFixtureSetup) error {
        if dataDir == "" {
                return errors.New("connection_fixture requires a spawned emulator data_dir")
        }
        src := cf.SourceDir
        if !filepath.IsAbs(src) {
                src = filepath.Join(repoRoot(), src)
        }
        return connectionfixture.CopyTree(dataDir, cf.ConnectionID, src)
}

func repoRoot() string {
        cwd, err := os.Getwd()
        if err != nil {
                return "."
        }
        return cwd
}

// setupDataset issues a `datasets.insert` for the synthesized
// fixture project / dataset pair. Location is hardcoded to US to
// match the gateway's default; fixtures that want a different
// location have to use a SQL setup step.
func setupDataset(ctx context.Context, base, dataset string) error {
        body := fmt.Sprintf(
                `{"datasetReference":{"projectId":"%s","datasetId":"%s"},"location":"US"}`,
                projectIDFromBase(base), dataset)
        status, respBody, err := doRequest(ctx, base+"/datasets", []byte(body))
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("datasets.insert -> %d: %s", status, snippet(respBody))
        }
        return nil
}

// setupTable issues a `tables.insert` with the fixture's column
// schema. STRUCT children round-trip through columnToTableField.
func setupTable(ctx context.Context, base string, t *TableSetup) error {
        tableBody := struct {
                TableReference bqtypes.TableReference `json:"tableReference"`
                Schema         *struct {
                        Fields []bqtypes.TableFieldSchema `json:"fields"`
                } `json:"schema,omitempty"`
                ExternalDataConfiguration *bqtypes.ExternalDataConfiguration `json:"externalDataConfiguration,omitempty"`
                View                      *bqtypes.ViewDefinition            `json:"view,omitempty"`
        }{}
        tableBody.TableReference = bqtypes.TableReference{
                ProjectID: projectIDFromBase(base),
                DatasetID: t.Dataset,
                TableID:   t.ID,
        }
        if t.External != nil {
                tableBody.ExternalDataConfiguration = &bqtypes.ExternalDataConfiguration{
                        SourceFormat: t.External.SourceFormat,
                        SourceURIs:   append([]string(nil), t.External.SourceURIs...),
                        Autodetect:   t.External.Autodetect,
                }
        }
        if t.View != nil {
                tableBody.View = &bqtypes.ViewDefinition{Query: t.View.Query}
        }
        if len(t.Schema) > 0 {
                tableBody.Schema = &struct {
                        Fields []bqtypes.TableFieldSchema `json:"fields"`
                }{}
                for _, c := range t.Schema {
                        tableBody.Schema.Fields = append(tableBody.Schema.Fields,
                                columnToTableField(c))
                }
        }
        jsonBody, err := json.Marshal(tableBody)
        if err != nil {
                return fmt.Errorf("marshal table body: %w", err)
        }
        url := fmt.Sprintf("%s/datasets/%s/tables", base, t.Dataset)
        status, respBody, err := doRequest(ctx, url, jsonBody)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("tables.insert -> %d: %s", status, snippet(respBody))
        }
        return nil
}

// setupRows issues a `tabledata.insertAll`. It is the only way to
// seed rows on the DuckDB engine today: INSERT VALUES returns
// UNIMPLEMENTED. The wire shape matches Google's REST API spec
// (each row is wrapped in `{json: {...}}`).
func setupRows(ctx context.Context, base string, rs *RowsSetup) error {
        type insertAllRow struct {
                JSON map[string]any `json:"json"`
        }
        body := struct {
                Kind string         `json:"kind"`
                Rows []insertAllRow `json:"rows"`
        }{
                Kind: "bigquery#tableDataInsertAllRequest",
                Rows: make([]insertAllRow, 0, len(rs.Rows)),
        }
        for _, r := range rs.Rows {
                body.Rows = append(body.Rows, insertAllRow{JSON: r})
        }
        jsonBody, err := json.Marshal(body)
        if err != nil {
                return fmt.Errorf("marshal insertAll body: %w", err)
        }
        url := fmt.Sprintf("%s/datasets/%s/tables/%s/insertAll",
                base, rs.Dataset, rs.Table)
        status, respBody, err := doRequest(ctx, url, jsonBody)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("tabledata.insertAll -> %d: %s",
                        status, snippet(respBody))
        }
        return nil
}

// setupSQL runs an arbitrary statement through the gateway's
// `/queries` endpoint. Used for setup phases that do not fit the
// dataset/table/rows shape (e.g. preparing a temp UDF).
func setupSQL(ctx context.Context, base, sql, defaultDataset string) error {
        queryBody, err := marshalJobsQueryBody(sql, defaultDataset)
        if err != nil {
                return err
        }
        status, respBody, err := doRequest(ctx, base+"/queries", queryBody)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("setup sql -> %d: %s", status, snippet(respBody))
        }
        return nil
}

// columnToTableField copies our YAML-decoded SchemaColumn onto the
// `bqtypes.TableFieldSchema` wire shape, recursing for STRUCT
// children so nested fields round-trip cleanly.
func columnToTableField(c SchemaColumn) bqtypes.TableFieldSchema {
        out := bqtypes.TableFieldSchema{
                Name:        c.Name,
                Type:        c.Type,
                Mode:        c.Mode,
                Description: c.Description,
        }
        if len(c.PolicyTags) > 0 {
                out.PolicyTags = &bqtypes.PolicyTagList{Names: append([]string(nil), c.PolicyTags...)}
        }
        for _, f := range c.Fields {
                out.Fields = append(out.Fields, columnToTableField(f))
        }
        return out
}

func setupRowAccessPolicy(ctx context.Context, base string, rap *RowAccessPolicySetup) error {
        body := map[string]any{
                "rowAccessPolicyReference": map[string]string{
                        "projectId": projectIDFromBase(base),
                        "datasetId": rap.Dataset,
                        "tableId":   rap.Table,
                        "policyId":  rap.PolicyID,
                },
                "filterPredicate": rap.FilterPredicate,
        }
        if len(rap.Grantees) > 0 {
                body["grantees"] = rap.Grantees
        }
        jsonBody, err := json.Marshal(body)
        if err != nil {
                return fmt.Errorf("marshal row access policy: %w", err)
        }
        url := fmt.Sprintf("%s/datasets/%s/tables/%s/rowAccessPolicies", base, rap.Dataset, rap.Table)
        status, respBody, err := doRequest(ctx, url, jsonBody)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("rowAccessPolicies.insert -> %d: %s", status, snippet(respBody))
        }
        return nil
}

func setupColumnGovernance(ctx context.Context, base string, cg *ColumnGovernanceSetup) error {
        field := map[string]any{
                "name":     cg.Column,
                "type":     "STRING",
                "maskKind": cg.MaskKind,
        }
        if cg.PolicyTag != "" {
                field["policyTags"] = map[string]any{"names": []string{cg.PolicyTag}}
        }
        patchBody := map[string]any{
                "schema": map[string]any{"fields": []map[string]any{field}},
        }
        jsonBody, err := json.Marshal(patchBody)
        if err != nil {
                return fmt.Errorf("marshal column governance patch: %w", err)
        }
        url := fmt.Sprintf("%s/datasets/%s/tables/%s", base, cg.Dataset, cg.Table)
        status, respBody, err := doPatchRequest(ctx, url, jsonBody)
        if err != nil {
                return err
        }
        if status < 200 || status >= 300 {
                return fmt.Errorf("tables.patch column governance -> %d: %s", status, snippet(respBody))
        }
        return nil
}

// projectIDFromBase pulls the projectId from a URL of the form
// .../bigquery/v2/projects/<projectId>. Returning the inner segment
// keeps the setup-step builders from having to thread projectId
// through their signatures.
func projectIDFromBase(base string) string {
        const marker = "/projects/"
        i := strings.LastIndex(base, marker)
        if i < 0 {
                return ""
        }
        return base[i+len(marker):]
}

package bqtypes

import "fmt"

// EncryptionConfiguration is the BigQuery REST encryptionConfiguration
// sub-object on tables and load-job destinationEncryptionConfiguration.
// The emulator stores kmsKeyName as opaque metadata only; it does not
// call Cloud KMS.
type EncryptionConfiguration struct {
        KMSKeyName string `json:"kmsKeyName,omitempty"`
}

// EmulatorCMEKKeyUSCentral returns a stable KMS crypto key resource name
// for regional CMEK samples (matches bqtestutil.EmulatorCMEKKeyUSCentral).
func EmulatorCMEKKeyUSCentral(projectID, cryptoKeyID string) string {
        return fmt.Sprintf(
                "projects/%s/locations/us-central1/keyRings/emulator/cryptoKeys/%s",
                projectID, cryptoKeyID,
        )
}

package bqtypes

import (
        "bytes"
        "encoding/json"
        "fmt"
        "strconv"
        "strings"
)

const (
        parameterTypeStruct = "STRUCT"
        parameterTypeArray  = "ARRAY"
)

// UnmarshalJSON accepts BigQuery REST parameter values where `value`
// may be encoded as a JSON string, number, or bool. The engine expects
// a decimal string in `value_json`; scalars are normalized here.
func (v *QueryParameterValue) UnmarshalJSON(data []byte) error {
        type wireQueryParameterValue struct {
                Value        json.RawMessage            `json:"value"`
                ArrayValues  []json.RawMessage          `json:"arrayValues"`
                StructValues map[string]json.RawMessage `json:"structValues"`
        }
        var raw wireQueryParameterValue
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        if raw.Value != nil {
                v.Value = normalizeParameterScalarJSON(raw.Value)
        }
        if len(raw.ArrayValues) > 0 {
                v.ArrayValues = make([]QueryParameterValue, 0, len(raw.ArrayValues))
                for _, elem := range raw.ArrayValues {
                        var nested QueryParameterValue
                        if err := json.Unmarshal(elem, &nested); err != nil {
                                // Element may be a bare scalar (number/bool/string).
                                nested.Value = normalizeParameterScalarJSON(elem)
                        }
                        v.ArrayValues = append(v.ArrayValues, nested)
                }
        }
        if len(raw.StructValues) > 0 {
                v.StructValues = make(map[string]QueryParameterValue, len(raw.StructValues))
                for name, field := range raw.StructValues {
                        var nested QueryParameterValue
                        if err := json.Unmarshal(field, &nested); err != nil {
                                nested.Value = normalizeParameterScalarJSON(field)
                        }
                        v.StructValues[name] = nested
                }
        }
        return nil
}

// ValueJSON returns the JSON literal string forwarded to the engine
// as `value_json`. Scalars use the normalized `value` field; ARRAY
// and STRUCT parameters marshal the nested shape.
func (v *QueryParameterValue) ValueJSON() string {
        if v == nil {
                return ""
        }
        if len(v.ArrayValues) > 0 {
                elems := make([]json.RawMessage, 0, len(v.ArrayValues))
                for i := range v.ArrayValues {
                        elems = append(elems, json.RawMessage(v.ArrayValues[i].marshalParameterJSON()))
                }
                raw, err := json.Marshal(elems)
                if err != nil {
                        return "[]"
                }
                return string(raw)
        }
        if len(v.StructValues) > 0 {
                obj := make(map[string]json.RawMessage, len(v.StructValues))
                for name, field := range v.StructValues {
                        obj[name] = json.RawMessage(field.marshalParameterJSON())
                }
                raw, err := json.Marshal(obj)
                if err != nil {
                        return "{}"
                }
                return string(raw)
        }
        return v.Value
}

// ParameterTypeWire returns the engine `type_kind` and optional
// `type_json` descriptor for a REST query parameter type.
func ParameterTypeWire(t *QueryParameterType) (typeKind, typeJSON string) {
        if t == nil {
                return "", ""
        }
        switch t.Type {
        case parameterTypeStruct:
                if len(t.StructTypes) == 0 {
                        return parameterTypeStruct, ""
                }
                parts := make([]string, 0, len(t.StructTypes))
                for _, st := range t.StructTypes {
                        fk, _ := ParameterTypeWire(&st.Type)
                        parts = append(parts, st.Name+":"+fk)
                }
                return parameterTypeStruct, strings.Join(parts, ",")
        case parameterTypeArray:
                if t.ArrayType == nil {
                        return parameterTypeArray, ""
                }
                elemKind, elemJSON := ParameterTypeWire(t.ArrayType)
                if elemKind == parameterTypeStruct {
                        return parameterTypeArray, parameterTypeStruct + ":" + elemJSON
                }
                return parameterTypeArray, elemKind
        default:
                return t.Type, ""
        }
}

// ParameterValueWire returns the JSON literal forwarded as engine
// `value_json`. STRUCT parameters use a positional JSON array aligned
// with `parameterType.structTypes`.
func ParameterValueWire(pt *QueryParameterType, v *QueryParameterValue) string {
        if v == nil {
                return ""
        }
        if pt != nil && pt.Type == parameterTypeStruct && len(pt.StructTypes) > 0 &&
                len(v.StructValues) > 0 {
                elems := make([]json.RawMessage, 0, len(pt.StructTypes))
                for _, st := range pt.StructTypes {
                        fv := v.StructValues[st.Name]
                        elems = append(elems, json.RawMessage(fv.marshalParameterJSON()))
                }
                raw, err := json.Marshal(elems)
                if err != nil {
                        return "[]"
                }
                return string(raw)
        }
        return v.ValueJSON()
}

func (v QueryParameterValue) marshalParameterJSON() []byte {
        if len(v.ArrayValues) > 0 || len(v.StructValues) > 0 {
                raw, err := json.Marshal(v)
                if err != nil {
                        return []byte("null")
                }
                return raw
        }
        if v.Value == "" {
                return []byte("null")
        }
        // Re-marshal through json so numeric/bool strings become proper JSON.
        var decoded any
        if err := json.Unmarshal([]byte(v.Value), &decoded); err == nil {
                raw, err := json.Marshal(decoded)
                if err == nil {
                        return raw
                }
        }
        return []byte(strconv.Quote(v.Value))
}

// normalizeParameterScalarJSON converts a JSON scalar token into the
// decimal-string form the C++ parameter parser expects.
func normalizeParameterScalarJSON(raw json.RawMessage) string {
        trimmed := bytes.TrimSpace(raw)
        if len(trimmed) == 0 || bytes.Equal(trimmed, []byte("null")) {
                return ""
        }
        var asString string
        if err := json.Unmarshal(trimmed, &asString); err == nil {
                return asString
        }
        var asBool bool
        if err := json.Unmarshal(trimmed, &asBool); err == nil {
                return strconv.FormatBool(asBool)
        }
        var asInt int64
        if err := json.Unmarshal(trimmed, &asInt); err == nil {
                return strconv.FormatInt(asInt, 10)
        }
        var asFloat float64
        if err := json.Unmarshal(trimmed, &asFloat); err == nil {
                return strconv.FormatFloat(asFloat, 'f', -1, 64)
        }
        return string(trimmed)
}

// ParseQueryParameters unmarshals a queryParameters JSON array, used
// by unit tests and any handler that decodes parameters outside the
// main QueryRequest body.
func ParseQueryParameters(data []byte) ([]QueryParameter, error) {
        var params []QueryParameter
        if len(data) == 0 {
                return nil, nil
        }
        if err := json.Unmarshal(data, &params); err != nil {
                return nil, fmt.Errorf("parse queryParameters: %w", err)
        }
        return params, nil
}

package bqtypes

import (
        "bytes"
        "encoding/json"
        "fmt"
        "strconv"
)

// RangePartitioning describes BigQuery integer-range partitioning. The
// only currently-supported `Range.Interval` granularity is integer
// buckets (`start`, `end`, `interval`); the field is just round-tripped
// for now.
type RangePartitioning struct {
        Field string         `json:"field,omitempty"`
        Range *RangePartSpec `json:"range,omitempty"`
}

// RangePartSpec is the `range` sub-object of RangePartitioning. All
// three integer fields are wire-serialized as decimal strings to mirror
// BigQuery REST. See docs/bigquery/docs/reference/rest/v2/tables/get.md.
type RangePartSpec struct {
        Start    string `json:"start,omitempty"`
        End      string `json:"end,omitempty"`
        Interval string `json:"interval,omitempty"`
}

// UnmarshalJSON accepts JSON numbers because the Node client posts
// range.start as a number on tables.insert.
func (r *RangePartSpec) UnmarshalJSON(data []byte) error {
        var raw struct {
                Start    json.RawMessage `json:"start,omitempty"`
                End      json.RawMessage `json:"end,omitempty"`
                Interval json.RawMessage `json:"interval,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        var err error
        if r.Start, err = unmarshalRangeIntString(raw.Start); err != nil {
                return fmt.Errorf("start: %w", err)
        }
        if r.End, err = unmarshalRangeIntString(raw.End); err != nil {
                return fmt.Errorf("end: %w", err)
        }
        if r.Interval, err = unmarshalRangeIntString(raw.Interval); err != nil {
                return fmt.Errorf("interval: %w", err)
        }
        return nil
}

func unmarshalRangeIntString(raw json.RawMessage) (string, error) {
        if raw == nil {
                return "", nil
        }
        trim := bytes.TrimSpace(raw)
        if len(trim) == 0 || bytes.Equal(trim, []byte("null")) {
                return "", nil
        }
        if trim[0] == '"' {
                var s string
                if err := json.Unmarshal(trim, &s); err != nil {
                        return "", err
                }
                return s, nil
        }
        var n json.Number
        if err := json.Unmarshal(trim, &n); err != nil {
                return "", err
        }
        i, err := n.Int64()
        if err != nil {
                return "", err
        }
        return strconv.FormatInt(i, 10), nil
}

package bqtypes

import (
        "encoding/json"
        "fmt"
)

// RoutineLanguage is the routine language on the wire. Gapic v2 REST may
// send the enum as a string ("SQL") or as a numeric proto enum (1).
type RoutineLanguage string

func routineLanguageFromNumeric(n int) (RoutineLanguage, bool) {
        switch n {
        case 1:
                return "SQL", true
        case 2:
                return "JAVASCRIPT", true
        case 3:
                return "PYTHON", true
        case 4:
                return "JAVA", true
        case 5:
                return "SCALA", true
        default:
                return "", false
        }
}

// UnmarshalJSON accepts string enum names or numeric gapic v2 values.
func (l *RoutineLanguage) UnmarshalJSON(data []byte) error {
        if string(data) == jsonNullLiteral {
                *l = ""
                return nil
        }
        var raw any
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        switch v := raw.(type) {
        case string:
                *l = RoutineLanguage(v)
                return nil
        case float64:
                if lang, ok := routineLanguageFromNumeric(int(v)); ok {
                        *l = lang
                        return nil
                }
                return fmt.Errorf("bqtypes: unknown language enum value %d", int(v))
        default:
                return fmt.Errorf("bqtypes: language must be string or number, got %T", raw)
        }
}

package bqtypes

import (
        "encoding/json"
        "fmt"
)

// RoutineReference is a stable handle to a routine (UDF / TVF / procedure).
type RoutineReference struct {
        ProjectID string `json:"projectId"`
        DatasetID string `json:"datasetId"`
        RoutineID string `json:"routineId"`
}

// StandardSqlDataType mirrors the BigQuery REST StandardSqlDataType
// resource. See docs/bigquery/docs/reference/rest/v2/StandardSqlDataType.md.
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlDataType struct {
        TypeKind         SQLTypeKind            `json:"typeKind"`
        ArrayElementType *StandardSqlDataType   `json:"arrayElementType,omitempty"`
        StructType       *StandardSqlStructType `json:"structType,omitempty"`
        RangeElementType *StandardSqlDataType   `json:"rangeElementType,omitempty"`
}

// StandardSqlStructType is the struct sub-object of StandardSqlDataType.
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlStructType struct {
        Fields []StandardSqlField `json:"fields,omitempty"`
}

// StandardSqlField is one field of a STRUCT type.
//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlField struct {
        Name string              `json:"name"`
        Type StandardSqlDataType `json:"type"`
}

//
//nolint:revive // wire name uses Sql, not SQL
type StandardSqlTableType struct {
        Columns []StandardSqlField `json:"columns,omitempty"`
}

// RoutineArgument is an input/output argument of a routine.
type RoutineArgument struct {
        Name         string               `json:"name,omitempty"`
        ArgumentKind string               `json:"argumentKind,omitempty"`
        Mode         string               `json:"mode,omitempty"`
        DataType     *StandardSqlDataType `json:"dataType,omitempty"`
}

// RoutineType is the fine-grained routine kind on the wire. Gapic v2
// REST may send the enum as a string ("SCALAR_FUNCTION") or as a
// numeric proto enum (1 = SCALAR_FUNCTION, 2 = PROCEDURE, …).
type RoutineType string

func routineTypeFromNumeric(n int) (RoutineType, bool) {
        switch n {
        case 1:
                return "SCALAR_FUNCTION", true
        case 2:
                return "PROCEDURE", true
        case 3:
                return "TABLE_VALUED_FUNCTION", true
        default:
                return "", false
        }
}

// UnmarshalJSON accepts string enum names or numeric gapic v2 values.
func (t *RoutineType) UnmarshalJSON(data []byte) error {
        if string(data) == jsonNullLiteral {
                *t = ""
                return nil
        }
        var raw any
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        switch v := raw.(type) {
        case string:
                *t = RoutineType(v)
                return nil
        case float64:
                if rt, ok := routineTypeFromNumeric(int(v)); ok {
                        *t = rt
                        return nil
                }
                return fmt.Errorf("bqtypes: unknown routineType enum value %d", int(v))
        default:
                return fmt.Errorf("bqtypes: routineType must be string or number, got %T", raw)
        }
}

// Routine is the BigQuery Routine resource (subset).
// See docs/bigquery/docs/reference/rest/v2/routines.md.
type Routine struct {
        Etag             string                `json:"etag,omitempty"`
        RoutineReference RoutineReference      `json:"routineReference"`
        RoutineType      RoutineType           `json:"routineType,omitempty"`
        CreationTime     string                `json:"creationTime,omitempty"`
        LastModifiedTime string                `json:"lastModifiedTime,omitempty"`
        Language         RoutineLanguage       `json:"language,omitempty"`
        Arguments        []RoutineArgument     `json:"arguments,omitempty"`
        ReturnType       *StandardSqlDataType  `json:"returnType,omitempty"`
        ReturnTableType  *StandardSqlTableType `json:"returnTableType,omitempty"`
        DefinitionBody   string                `json:"definitionBody,omitempty"`
        Description      string                `json:"description,omitempty"`
        StrictMode       *bool                 `json:"strictMode,omitempty"`
        PythonOptions    *PythonOptions        `json:"pythonOptions,omitempty"`
}

// PythonOptions mirrors the BigQuery REST PythonOptions resource for
// LANGUAGE python routines.
type PythonOptions struct {
        EntryPoint string   `json:"entryPoint,omitempty"`
        Packages   []string `json:"packages,omitempty"`
}

package bqtypes

// ApplyDefaultCollationToStringFields stamps `collation` onto STRING (and
// STRING-like) top-level schema fields when the table carries a
// `defaultCollation` and the field does not already specify one.
func ApplyDefaultCollationToStringFields(schema *TableSchema, defaultCollation string) *TableSchema {
        if schema == nil || defaultCollation == "" || len(schema.Fields) == 0 {
                return schema
        }
        out := *schema
        out.Fields = make([]TableFieldSchema, len(schema.Fields))
        for i, f := range schema.Fields {
                out.Fields[i] = applyDefaultCollationField(f, defaultCollation)
        }
        return &out
}

func applyDefaultCollationField(f TableFieldSchema, defaultCollation string) TableFieldSchema {
        out := f
        if f.Collation == "" && isStringLikeFieldType(f.Type) {
                out.Collation = defaultCollation
        }
        if len(f.Fields) > 0 {
                nested := make([]TableFieldSchema, len(f.Fields))
                for i, sub := range f.Fields {
                        nested[i] = applyDefaultCollationField(sub, defaultCollation)
                }
                out.Fields = nested
        }
        return out
}

func isStringLikeFieldType(t string) bool {
        switch t {
        case "STRING", "JSON", "GEOGRAPHY":
                return true
        default:
                return false
        }
}

package bqtypes

// PolicyTagList is the policyTags sub-object on TableFieldSchema.
type PolicyTagList struct {
        Names []string `json:"names,omitempty"`
}

// ExtractSchemaPolicyOverlay copies only policyTags-bearing fields from
// a REST schema so the gateway metadata store can round-trip column ACLs
// without shadowing engine-owned column types.
func ExtractSchemaPolicyOverlay(s *TableSchema) *TableSchema {
        if s == nil || len(s.Fields) == 0 {
                return nil
        }
        fields := extractPolicyFields(s.Fields)
        if len(fields) == 0 {
                return nil
        }
        return &TableSchema{Fields: fields}
}

func extractPolicyFields(fields []TableFieldSchema) []TableFieldSchema {
        out := make([]TableFieldSchema, 0, len(fields))
        for _, f := range fields {
                nested := extractPolicyFields(f.Fields)
                if f.PolicyTags != nil && len(f.PolicyTags.Names) > 0 ||
                        f.Collation != "" || f.DefaultValueExpression != "" || len(nested) > 0 {
                        out = append(out, TableFieldSchema{
                                Name:                   f.Name,
                                Collation:              f.Collation,
                                PolicyTags:             f.PolicyTags,
                                DefaultValueExpression: f.DefaultValueExpression,
                                Fields:                 nested,
                        })
                        continue
                }
        }
        return out
}

// MergeSchemaPolicyTags overlays cached policyTags onto the engine schema
// returned by tables.get.
func MergeSchemaPolicyTags(base, overlay *TableSchema) *TableSchema {
        if base == nil {
                return overlay
        }
        if overlay == nil || len(overlay.Fields) == 0 {
                return base
        }
        merged := *base
        merged.Fields = mergeFieldPolicyTags(base.Fields, overlay.Fields)
        return &merged
}

func mergeFieldPolicyTags(base, overlay []TableFieldSchema) []TableFieldSchema {
        if len(base) == 0 {
                return overlay
        }
        byName := map[string]TableFieldSchema{}
        for _, f := range overlay {
                byName[f.Name] = f
        }
        out := append([]TableFieldSchema(nil), base...)
        for i, f := range base {
                out[i] = f
                ov, ok := byName[f.Name]
                if !ok {
                        continue
                }
                if ov.PolicyTags != nil {
                        out[i].PolicyTags = ov.PolicyTags
                }
                if ov.Collation != "" {
                        out[i].Collation = ov.Collation
                }
                if ov.DefaultValueExpression != "" {
                        out[i].DefaultValueExpression = ov.DefaultValueExpression
                }
                if len(f.Fields) > 0 || len(ov.Fields) > 0 {
                        out[i].Fields = mergeFieldPolicyTags(f.Fields, ov.Fields)
                }
        }
        for _, ov := range overlay {
                if _, ok := indexFieldByName(base, ov.Name); ok {
                        continue
                }
                out = append(out, ov)
        }
        return out
}

func indexFieldByName(fields []TableFieldSchema, name string) (TableFieldSchema, bool) {
        for _, f := range fields {
                if f.Name == name {
                        return f, true
                }
        }
        return TableFieldSchema{}, false
}

package bqtypes

import (
        "encoding/json"
        "fmt"
)

// SQLTypeKind is StandardSqlDataType.typeKind on the wire. Gapic v2 REST
// may send the enum as a string ("INT64") or as a numeric proto enum (2).
type SQLTypeKind string

func sqlTypeKindFromNumeric(n int) (SQLTypeKind, bool) {
        switch n {
        case 2:
                return "INT64", true
        case 5:
                return "BOOL", true
        case 7:
                return "FLOAT64", true
        case 8:
                return "STRING", true
        case 9:
                return "BYTES", true
        case 10:
                return "DATE", true
        case 16:
                return "ARRAY", true
        case 17:
                return "STRUCT", true
        case 19:
                return "TIMESTAMP", true
        case 20:
                return "TIME", true
        case 21:
                return "DATETIME", true
        case 22:
                return "GEOGRAPHY", true
        case 23:
                return "NUMERIC", true
        case 24:
                return "BIGNUMERIC", true
        case 25:
                return "JSON", true
        case 26:
                return "INTERVAL", true
        default:
                return "", false
        }
}

// UnmarshalJSON accepts string enum names or numeric gapic v2 values.
func (t *SQLTypeKind) UnmarshalJSON(data []byte) error {
        if string(data) == jsonNullLiteral {
                *t = ""
                return nil
        }
        var raw any
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        switch v := raw.(type) {
        case string:
                *t = SQLTypeKind(v)
                return nil
        case float64:
                if tk, ok := sqlTypeKindFromNumeric(int(v)); ok {
                        *t = tk
                        return nil
                }
                return fmt.Errorf("bqtypes: unknown typeKind enum value %d", int(v))
        default:
                return fmt.Errorf("bqtypes: typeKind must be string or number, got %T", raw)
        }
}

// Package bqtypes contains wire-compatible Go structs for the small slice
// of the BigQuery v2 REST API the emulator currently understands.
//
// We do not re-generate these from the official Discovery doc yet; this
// hand-written subset is enough to compile and exercise the route table.
// As we flesh out handlers, types here can be replaced by generated code
// (e.g. via `google.golang.org/api/bigquery/v2`'s generated structs) or
// expanded inline.
package bqtypes

import (
        "encoding/json"
        "fmt"
        "strconv"
)

// labelsWireState is populated by Dataset/Table UnmarshalJSON when the body
// carries an explicit labels field (including label-delete null values).
type labelsWireState struct {
        present bool
        delete  []string
}

type collationWireState struct {
        present bool
}

// DatasetReference is a stable handle to a dataset.
type DatasetReference struct {
        ProjectID string `json:"projectId"`
        DatasetID string `json:"datasetId"`
}

// TableReference is a stable handle to a table.
type TableReference struct {
        ProjectID string `json:"projectId"`
        DatasetID string `json:"datasetId"`
        TableID   string `json:"tableId"`
}

// JobReference is a stable handle to a job.
type JobReference struct {
        ProjectID string `json:"projectId"`
        JobID     string `json:"jobId"`
        Location  string `json:"location,omitempty"`
}

// Dataset is the BigQuery Dataset resource (subset).
//
// Access is the dataset ACL — a list of role bindings. The field is
// always serialized (no `omitempty`) because the Java BigQuery client
// calls `new ArrayList<>(dataset.getAcl())` on the deserialized
// response, which NPEs when the field is null. Live BigQuery returns
// an empty array for newly-created datasets; the emulator must
// preserve that shape so AuthorizeDatasetIT-style ACL-mutation flows
// work end-to-end. See the failing-IT inventory in
// `docs/ENGINE_POLICY.md`.
//
// Labels is always serialized (no `omitempty`) for the same reason:
// the Node `getDatasetLabels` sample (and several upstream Python
// snippets) call `Object.entries(dataset.metadata.labels)` /
// `dict(dataset.labels)` on the deserialized response, which raises
// `TypeError: Cannot convert undefined or null to object` /
// `TypeError: argument of type 'NoneType' is not iterable` when the
// field is missing. Live BigQuery returns `labels: {}` for a newly
// created dataset; the resource builder defaults a nil map to `{}` to
// match. Same for Table.Labels below.
type Dataset struct {
        Kind                     string           `json:"kind,omitempty"` // bigquery#dataset
        ID                       string           `json:"id,omitempty"`
        DatasetReference         DatasetReference `json:"datasetReference"`
        FriendlyName             string           `json:"friendlyName,omitempty"`
        Description              string           `json:"description,omitempty"`
        Location                 string           `json:"location,omitempty"`
        Etag                     string           `json:"etag,omitempty"`
        CreationTime             string           `json:"creationTime,omitempty"`
        LastModifiedTime         string           `json:"lastModifiedTime,omitempty"`
        Access                   []map[string]any `json:"access"`
        Labels                   ResourceLabels   `json:"labels"`
        DefaultTableExpirationMs string           `json:"defaultTableExpirationMs,omitempty"`
        // DefaultPartitionExpirationMs is inherited by new time-partitioned
        // tables in the dataset. See
        // docs/bigquery/docs/reference/rest/v2/datasets/get.md.
        DefaultPartitionExpirationMs string `json:"defaultPartitionExpirationMs,omitempty"`
        // DefaultCollation is BigQuery's per-dataset default text
        // collation (typically `und:ci` for the case-insensitive lane
        // the upstream node sample exercises). The emulator does not
        // honor it at query time today, but the value still has to
        // round-trip through GET/PATCH so client libraries observe the
        // shape they expect. See
        // docs/bigquery/docs/reference/rest/v2/datasets/get.md.
        DefaultCollation string `json:"defaultCollation,omitempty"`
        // DefaultRoundingMode is inherited by new NUMERIC/BIGNUMERIC columns in
        // tables created in this dataset. Round-trips via the gateway overlay.
        DefaultRoundingMode string `json:"defaultRoundingMode,omitempty"`
        // MaxTimeTravelHours is the dataset time-travel window (48–168 hours).
        MaxTimeTravelHours string `json:"maxTimeTravelHours,omitempty"`
        // IsCaseInsensitive marks dataset/table name lookups as case-insensitive.
        IsCaseInsensitive *bool `json:"isCaseInsensitive,omitempty"`
        // ResourceTags are GCP resource manager tags attached to the dataset.
        ResourceTags map[string]string `json:"resourceTags,omitempty"`
        // Replicas echoes cross-region replica references supplied on write;
        // the emulator does not model active replication.
        Replicas []TableReference `json:"replicas,omitempty"`
        // ExternalDatasetReference marks a Spanner / Cloud SQL external dataset.
        ExternalDatasetReference *ExternalDatasetReference `json:"externalDatasetReference,omitempty"`

        labelsWire            labelsWireState    `json:"-"`
        defaultCollationWire  collationWireState `json:"-"`
        DefaultCollationSet   bool               `json:"-"`
        omitEmptyLabelsOnWire bool               `json:"-"`
}

// LabelsPatchPresent reports whether a decoded request body explicitly set labels.
func (d Dataset) LabelsPatchPresent() bool {
        return d.labelsWire.present
}

// LabelsToDelete returns label keys cleared via JSON null in the request body.
func (d Dataset) LabelsToDelete() []string {
        return d.labelsWire.delete
}

// DefaultCollationPresent reports whether the request body explicitly set
// defaultCollation (including empty string to clear).
func (d Dataset) DefaultCollationPresent() bool {
        return d.defaultCollationWire.present
}

// SetOmitEmptyLabelsOnWire omits the labels JSON field on PATCH responses
// when empty so Node deleteLabel* samples log `undefined` for apiResponse.labels.
func (d *Dataset) SetOmitEmptyLabelsOnWire(v bool) {
        d.omitEmptyLabelsOnWire = v
}

// UnmarshalJSON accepts labels values of JSON null (label delete) and the
// usual string map entries client libraries send on datasets.patch.
func (d *Dataset) UnmarshalJSON(data []byte) error {
        type alias Dataset
        var raw struct {
                alias
                Labels           json.RawMessage `json:"labels,omitempty"`
                DefaultCollation json.RawMessage `json:"defaultCollation,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *d = Dataset(raw.alias)
        patch, err := parseLabelsJSON(raw.Labels)
        if err != nil {
                return err
        }
        if patch.present {
                d.Labels = ResourceLabels(patch.values)
                d.labelsWire = labelsWireState{present: true, delete: patch.delete}
        }
        if raw.DefaultCollation != nil {
                d.defaultCollationWire.present = true
                d.DefaultCollationSet = true
                if err := json.Unmarshal(raw.DefaultCollation, &d.DefaultCollation); err != nil {
                        return fmt.Errorf("defaultCollation: %w", err)
                }
        }
        return nil
}

// MarshalJSON emits labels:{} by default; omits labels when empty after a
// label-delete PATCH so Node clients surface apiResponse.labels as undefined.
func (d Dataset) MarshalJSON() ([]byte, error) {
        type alias Dataset
        var raw []byte
        var err error
        if d.omitEmptyLabelsOnWire && len(d.Labels) == 0 {
                raw, err = marshalWithoutJSONField(alias(d), "labels")
        } else {
                raw, err = json.Marshal(alias(d))
        }
        if err != nil || !d.DefaultCollationSet {
                return raw, err
        }
        return injectJSONStringField(raw, "defaultCollation", d.DefaultCollation)
}

// Table is the BigQuery Table resource (subset).
//
// Labels is always serialized (no `omitempty`); see the matching note
// on Dataset.Labels. tableResource defaults a nil map to `{}` so the
// upstream `getTableLabels` sample's `Object.entries(table.metadata.labels)`
// returns an empty iterator instead of erroring.
type Table struct {
        Kind           string         `json:"kind,omitempty"` // bigquery#table
        ID             string         `json:"id,omitempty"`
        TableReference TableReference `json:"tableReference"`
        FriendlyName   string         `json:"friendlyName,omitempty"`
        Description    string         `json:"description,omitempty"`
        Schema         *TableSchema   `json:"schema,omitempty"`
        Type           string         `json:"type,omitempty"` // TABLE | VIEW | EXTERNAL
        NumRows        string         `json:"numRows,omitempty"`
        NumBytes       string         `json:"numBytes,omitempty"`
        // Output-only storage breakdown fields. The gateway stubs these to "0"
        // until the engine exposes byte accounting RPCs.
        NumLongTermBytes           string         `json:"numLongTermBytes,omitempty"`
        NumActiveLogicalBytes      string         `json:"numActiveLogicalBytes,omitempty"`
        NumTotalLogicalBytes       string         `json:"numTotalLogicalBytes,omitempty"`
        NumCurrentPhysicalBytes    string         `json:"numCurrentPhysicalBytes,omitempty"`
        NumPhysicalBytes           string         `json:"numPhysicalBytes,omitempty"`
        NumActivePhysicalBytes     string         `json:"numActivePhysicalBytes,omitempty"`
        NumLongTermPhysicalBytes   string         `json:"numLongTermPhysicalBytes,omitempty"`
        NumTimeTravelPhysicalBytes string         `json:"numTimeTravelPhysicalBytes,omitempty"`
        CreationTime               string         `json:"creationTime,omitempty"`
        LastModifiedTime           string         `json:"lastModifiedTime,omitempty"`
        Etag                       string         `json:"etag,omitempty"`
        Labels                     ResourceLabels `json:"labels"`
        // ExpirationTime is the wall-clock time at which the table
        // expires, encoded as a decimal string of milliseconds since
        // epoch -- BigQuery REST always serializes int64 timestamps
        // as strings to dodge JavaScript's 53-bit integer ceiling.
        // `omitempty` is intentional: live BigQuery omits the field
        // when the table has no expiration.
        ExpirationTime MillisTimestamp `json:"expirationTime,omitempty"`
        // RangePartitioning is the integer-range partitioning spec
        // (`{field, range:{start,end,interval}}`) the upstream node
        // `createTableRangePartitioned` sample sets and the matching
        // test asserts on the GET response.
        RangePartitioning *RangePartitioning `json:"rangePartitioning,omitempty"`
        // TimePartitioning is the (TIME / DAY / HOUR / MONTH / YEAR)
        // time-based partitioning spec. Not exercised by every test
        // but parallel to RangePartitioning so the roundtrip helper
        // can carry it without dropping the field on the floor.
        TimePartitioning *TimePartitioning `json:"timePartitioning,omitempty"`
        // Clustering is the per-table clustering spec the upstream
        // node `createTableClustered` sample sets via
        // `{ fields: ['city', 'zipcode'] }`.
        Clustering *Clustering `json:"clustering,omitempty"`
        // DefaultCollation is the table-level default text collation
        // (typically `und:ci`). Mirrors Dataset.DefaultCollation;
        // see that field's comment for the round-trip rationale.
        DefaultCollation string `json:"defaultCollation,omitempty"`
        // Location is the BigQuery region for the table (inherited from
        // its dataset on live BigQuery). Client libraries such as
        // google-cloud-bigquery and BigFrames read this on tables.get.
        Location string `json:"location,omitempty"`
        // RequirePartitionFilter mirrors the table-level partition-filter
        // requirement BigQuery REST exposes. Pointer semantics let PATCH
        // bodies set `false` explicitly without conflating unset and false.
        RequirePartitionFilter *bool `json:"requirePartitionFilter,omitempty"`
        // View holds the view definition when Type is VIEW.
        View *ViewDefinition `json:"view,omitempty"`
        // MaterializedView holds the MV definition when Type is
        // MATERIALIZED_VIEW. The query is analyzed at insert time to
        // infer the catalog schema when the client omits an explicit
        // TableSchema (see QueryMaterializedViewIT).
        MaterializedView *MaterializedViewDefinition `json:"materializedView,omitempty"`
        // ExternalDataConfiguration describes a table backed by data
        // outside the emulator catalog (GCS CSV/JSON/Parquet, ...).
        // Persisted in the gateway MetadataStore and materialized into
        // the engine catalog at insert/query time for supported formats.
        ExternalDataConfiguration *ExternalDataConfiguration `json:"externalDataConfiguration,omitempty"`
        // EncryptionConfiguration stores the opaque CMEK kmsKeyName the
        // client supplied on create/load/update. Not enforced by the emulator.
        EncryptionConfiguration *EncryptionConfiguration `json:"encryptionConfiguration,omitempty"`
        // DefaultRoundingMode is inherited by new NUMERIC/BIGNUMERIC columns.
        DefaultRoundingMode string `json:"defaultRoundingMode,omitempty"`
        // CaseInsensitive marks table name lookups as case-insensitive within
        // a case-insensitive dataset.
        CaseInsensitive *bool `json:"caseInsensitive,omitempty"`
        // ResourceTags are GCP resource manager tags attached to the table.
        ResourceTags map[string]string `json:"resourceTags,omitempty"`
        // TableConstraints carries primary/foreign key metadata (not enforced).
        TableConstraints *TableConstraints `json:"tableConstraints,omitempty"`
        // Replicas echoes cross-region replica references on write.
        Replicas []TableReference `json:"replicas,omitempty"`
        // BiglakeConfiguration marks a BigLake-managed table (unsupported).
        BiglakeConfiguration *BiglakeConfiguration `json:"biglakeConfiguration,omitempty"`
        // ObjectTableOptions marks an object table (unsupported).
        ObjectTableOptions *ObjectTableOptions `json:"objectTableOptions,omitempty"`

        labelsWire            labelsWireState    `json:"-"`
        defaultCollationWire  collationWireState `json:"-"`
        DefaultCollationSet   bool               `json:"-"`
        omitEmptyLabelsOnWire bool               `json:"-"`
}

// LabelsPatchPresent reports whether a decoded request body explicitly set labels.
func (t Table) LabelsPatchPresent() bool {
        return t.labelsWire.present
}

// LabelsToDelete returns label keys cleared via JSON null in the request body.
func (t Table) LabelsToDelete() []string {
        return t.labelsWire.delete
}

// DefaultCollationPresent reports whether the request body explicitly set
// defaultCollation (including empty string to clear).
func (t Table) DefaultCollationPresent() bool {
        return t.defaultCollationWire.present
}

// SetOmitEmptyLabelsOnWire omits the labels JSON field on PATCH responses
// when empty so Node deleteLabel* samples log `undefined` for apiResponse.labels.
func (t *Table) SetOmitEmptyLabelsOnWire(v bool) {
        t.omitEmptyLabelsOnWire = v
}

// UnmarshalJSON accepts expirationTime as a decimal string or JSON number and
// labels values of JSON null (label delete).
func (t *Table) UnmarshalJSON(data []byte) error {
        type alias Table
        var raw struct {
                alias
                ExpirationTime   json.RawMessage `json:"expirationTime,omitempty"`
                Labels           json.RawMessage `json:"labels,omitempty"`
                DefaultCollation json.RawMessage `json:"defaultCollation,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *t = Table(raw.alias)
        if raw.ExpirationTime != nil {
                var ts MillisTimestamp
                if err := json.Unmarshal(raw.ExpirationTime, &ts); err != nil {
                        return fmt.Errorf("expirationTime: %w", err)
                }
                t.ExpirationTime = ts
        }
        patch, err := parseLabelsJSON(raw.Labels)
        if err != nil {
                return err
        }
        if patch.present {
                t.Labels = ResourceLabels(patch.values)
                t.labelsWire = labelsWireState{present: true, delete: patch.delete}
        }
        if raw.DefaultCollation != nil {
                t.defaultCollationWire.present = true
                t.DefaultCollationSet = true
                if err := json.Unmarshal(raw.DefaultCollation, &t.DefaultCollation); err != nil {
                        return fmt.Errorf("defaultCollation: %w", err)
                }
        }
        return nil
}

// MarshalJSON emits labels:{} by default; omits labels when empty after a
// label-delete PATCH so Node clients surface apiResponse.labels as undefined.
func (t Table) MarshalJSON() ([]byte, error) {
        type alias Table
        var raw []byte
        var err error
        if t.omitEmptyLabelsOnWire && len(t.Labels) == 0 {
                raw, err = marshalWithoutJSONField(alias(t), "labels")
        } else {
                raw, err = json.Marshal(alias(t))
        }
        if err != nil || !t.DefaultCollationSet {
                return raw, err
        }
        return injectJSONStringField(raw, "defaultCollation", t.DefaultCollation)
}

// ExternalDataConfiguration mirrors the BigQuery REST external data
// source object. See docs/bigquery/docs/reference/rest/v2/tables.md.
type ExternalDataConfiguration struct {
        SourceURIs              []string                 `json:"sourceUris,omitempty"`
        SourceFormat            string                   `json:"sourceFormat,omitempty"`
        Autodetect              bool                     `json:"autodetect,omitempty"`
        Schema                  *TableSchema             `json:"schema,omitempty"`
        CsvOptions              *CsvOptions              `json:"csvOptions,omitempty"`
        GoogleSheetsOptions     *GoogleSheetsOptions     `json:"googleSheetsOptions,omitempty"`
        HivePartitioningOptions *HivePartitioningOptions `json:"hivePartitioningOptions,omitempty"`
        IgnoreUnknownValues     bool                     `json:"ignoreUnknownValues,omitempty"`
        MaxBadRecords           int                      `json:"maxBadRecords,omitempty"`
        Compression             string                   `json:"compression,omitempty"`
}

// HivePartitioningOptions mirrors the BigQuery REST hivePartitioningOptions
// object. See docs/bigquery/docs/reference/rest/v2/tables.md.
type HivePartitioningOptions struct {
        Mode                   string   `json:"mode,omitempty"`
        SourceURIPrefix        string   `json:"sourceUriPrefix,omitempty"`
        RequirePartitionFilter bool     `json:"requirePartitionFilter,omitempty"`
        Fields                 []string `json:"fields,omitempty"`
}

// CsvOptions is the csvOptions sub-object of ExternalDataConfiguration.
type CsvOptions struct {
        FieldDelimiter      string `json:"fieldDelimiter,omitempty"`
        Quote               string `json:"quote,omitempty"`
        Encoding            string `json:"encoding,omitempty"`
        AllowJaggedRows     bool   `json:"allowJaggedRows,omitempty"`
        AllowQuotedNewlines bool   `json:"allowQuotedNewlines,omitempty"`
        skipLeadingRows     int
}

// SkipLeadingRows returns the number of leading CSV rows to skip.
func (o *CsvOptions) SkipLeadingRows() int {
        if o == nil {
                return 0
        }
        return o.skipLeadingRows
}

// UnmarshalJSON accepts skipLeadingRows as JSON number or decimal string.
func (o *CsvOptions) UnmarshalJSON(data []byte) error {
        type alias CsvOptions
        var raw struct {
                alias
                SkipLeadingRows any `json:"skipLeadingRows,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *o = CsvOptions(raw.alias)
        if raw.SkipLeadingRows == nil {
                return nil
        }
        switch v := raw.SkipLeadingRows.(type) {
        case float64:
                o.skipLeadingRows = int(v)
        case string:
                n, err := strconv.Atoi(v)
                if err != nil {
                        return fmt.Errorf("csvOptions.skipLeadingRows: %w", err)
                }
                o.skipLeadingRows = n
        default:
                return fmt.Errorf("csvOptions.skipLeadingRows: unsupported type %T", v)
        }
        return nil
}

// GoogleSheetsOptions is the googleSheetsOptions sub-object.
type GoogleSheetsOptions struct {
        Range           string `json:"range,omitempty"`
        skipLeadingRows int
}

// SkipLeadingRows returns the number of leading sheet rows to skip.
func (o *GoogleSheetsOptions) SkipLeadingRows() int {
        if o == nil {
                return 0
        }
        return o.skipLeadingRows
}

// UnmarshalJSON accepts skipLeadingRows as JSON number or decimal string.
func (o *GoogleSheetsOptions) UnmarshalJSON(data []byte) error {
        type alias GoogleSheetsOptions
        var raw struct {
                alias
                SkipLeadingRows any `json:"skipLeadingRows,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *o = GoogleSheetsOptions(raw.alias)
        if raw.SkipLeadingRows == nil {
                return nil
        }
        switch v := raw.SkipLeadingRows.(type) {
        case float64:
                o.skipLeadingRows = int(v)
        case string:
                n, err := strconv.Atoi(v)
                if err != nil {
                        return fmt.Errorf("googleSheetsOptions.skipLeadingRows: %w", err)
                }
                o.skipLeadingRows = n
        default:
                return fmt.Errorf("googleSheetsOptions.skipLeadingRows: unsupported type %T", v)
        }
        return nil
}

// TableConstraints mirrors the BigQuery REST tableConstraints object.
type TableConstraints struct {
        PrimaryKey *PrimaryKey `json:"primaryKey,omitempty"`
}

// PrimaryKey is the primaryKey sub-object of TableConstraints.
type PrimaryKey struct {
        Columns []string `json:"columns,omitempty"`
}

// ViewDefinition is the BigQuery REST view sub-object. See
// docs/bigquery/docs/reference/rest/v2/tables#ViewDefinition.
type ViewDefinition struct {
        Query        string `json:"query,omitempty"`
        UseLegacySQL bool   `json:"useLegacySql,omitempty"`
}

// MaterializedViewDefinition is the BigQuery REST materializedView
// sub-object. See docs/bigquery/docs/reference/rest/v2/tables#MaterializedViewDefinition.
type MaterializedViewDefinition struct {
        Query string `json:"query,omitempty"`
}

// TimePartitioning describes time-based partitioning. Carried for
// roundtrip only; the emulator does not enforce partition expiration.
type TimePartitioning struct {
        Type         string `json:"type,omitempty"`
        Field        string `json:"field,omitempty"`
        ExpirationMs string `json:"expirationMs,omitempty"`
}

// Clustering is the per-table clustering spec.
type Clustering struct {
        Fields []string `json:"fields,omitempty"`
}

// TableSchema is the BigQuery TableSchema resource.
type TableSchema struct {
        Fields []TableFieldSchema `json:"fields,omitempty"`
}

// TableFieldSchema is one column in a TableSchema.
type TableFieldSchema struct {
        Name                   string         `json:"name"`
        Type                   string         `json:"type"`           // STRING, INT64, FLOAT64, BOOL, TIMESTAMP, ...
        Mode                   string         `json:"mode,omitempty"` // NULLABLE, REQUIRED, REPEATED
        Description            string         `json:"description,omitempty"`
        DefaultValueExpression string         `json:"defaultValueExpression,omitempty"`
        Collation              string         `json:"collation,omitempty"`
        PolicyTags             *PolicyTagList `json:"policyTags,omitempty"`
        // MaskKind is an emulator extension for column-level data masking
        // (NULLIFY | SHA256 | DEFAULT_VALUE | DENIED). BigQuery clients
        // ignore unknown JSON fields; the gateway persists this via
        // SetColumnGovernance on tables.insert/patch/update.
        MaskKind string             `json:"maskKind,omitempty"`
        Fields   []TableFieldSchema `json:"fields,omitempty"` // for STRUCT/RECORD
}

// ExternalDatasetReference links a dataset to an external Spanner / Cloud SQL source.
type ExternalDatasetReference struct {
        Connection string `json:"connection,omitempty"`
        Source     string `json:"source,omitempty"`
}

// BiglakeConfiguration marks a BigLake-managed table.
type BiglakeConfiguration struct {
        ConnectionID string `json:"connectionId,omitempty"`
        StorageURI   string `json:"storageUri,omitempty"`
        FileFormat   string `json:"fileFormat,omitempty"`
        TableFormat  string `json:"tableFormat,omitempty"`
}

// ObjectTableOptions marks an object table over GCS object metadata.
type ObjectTableOptions struct {
        SourceURIs []string `json:"sourceUris,omitempty"`
}

package bqtypes

import (
        "errors"
        "strconv"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// ValueToCell converts a `bigquery_emulator.v1.Cell` from the engine
// gRPC contract into the BigQuery REST `f`/`v` wire shape that
// `jobs.query`, `jobs.getQueryResults`, and `tabledata.list` emit.
//
// The C++ engine is responsible for serializing each `googlesql::Value`
// into the per-TypeKind string form documented at
// docs/bigquery/docs/reference/rest/v2/StandardSqlDataType.md (mirrored
// in docs/REST_API.md "Type wire encoding"). At the wire level
// everything is strings/objects/arrays, so this function is mostly a
// structural rewrap:
//
//   - `INT64`            -> decimal string, e.g. "42"
//   - `BOOL`             -> "true" or "false"
//   - `FLOAT64`          -> decimal string, or "NaN" / "Infinity" / "-Infinity"
//   - `STRING`           -> raw string
//   - `BYTES`            -> base64 string (RFC 4648 section 4)
//   - `DATE`             -> "YYYY-MM-DD"
//   - `TIMESTAMP`        -> RFC 3339 with mandatory `Z`, microsecond precision
//   - `DATETIME`         -> "YYYY-MM-DD HH:MM:SS.ffffff"
//   - `TIME`             -> "HH:MM:SS.ffffff"
//   - `NUMERIC`          -> decimal string
//   - `BIGNUMERIC`       -> decimal string
//   - `GEOGRAPHY`        -> WKT string
//   - `JSON`             -> string-encoded JSON
//   - `ARRAY`            -> Cell whose `v` is a list of {"v": ...} entries
//   - `STRUCT`           -> Cell whose `v` is a Row-shaped {"f": [{"v": ...}, ...]}
//   - NULL               -> Cell whose `v` is nil (JSON null)
//
// STRUCT is rendered as a nested `Row` (positional `f`) rather than a
// JSON object so it round-trips through `tabledata.list`, which
// disallows duplicate field names. ARRAY elements are themselves
// `Cell`s so nested ARRAY-of-STRUCT, ARRAY-of-ARRAY, and NULL elements
// all marshal consistently.
//
// A nil input cell is treated as NULL.
func ValueToCell(c *enginepb.Cell) Cell {
        if c == nil {
                return Cell{V: nil}
        }
        switch v := c.GetValue().(type) {
        case *enginepb.Cell_StringValue:
                return Cell{V: v.StringValue}
        case *enginepb.Cell_NullValue:
                return Cell{V: nil}
        case *enginepb.Cell_Array:
                elements := v.Array.GetElements()
                out := make([]Cell, 0, len(elements))
                for _, el := range elements {
                        out = append(out, ValueToCell(el))
                }
                return Cell{V: out}
        case *enginepb.Cell_StructValue:
                fields := v.StructValue.GetFields()
                out := make([]Cell, 0, len(fields))
                for _, f := range fields {
                        out = append(out, ValueToCell(f))
                }
                return Cell{V: Row{F: out}}
        default:
                return Cell{V: nil}
        }
}

// CellsToRow lowers a flat slice of engine cells into the top-level
// `f`/`v` Row shape BigQuery REST clients expect. Top-level rows are
// always Row-shaped; a STRUCT column nested inside the row becomes a
// Cell whose `v` is itself a Row (handled by ValueToCell).
func CellsToRow(cells []*enginepb.Cell) Row {
        out := Row{F: make([]Cell, 0, len(cells))}
        for _, c := range cells {
                out.F = append(out.F, ValueToCell(c))
        }
        return out
}

// WireFormatOptions controls optional REST wire-shape adjustments for
// tabledata.list and query results.
type WireFormatOptions struct {
        UseInt64Timestamp bool
}

// CellsToRowForSchema is like CellsToRow but re-encodes TIMESTAMP
// values as decimal microsecond strings. google-cloud-bigquery's
// query-result parser (`CELL_DATA_PARSER.timestamp_to_py`) expects
// microseconds since Unix epoch, not the human-readable strings the
// engine emits.
func CellsToRowForSchema(
        cells []*enginepb.Cell,
        schema *enginepb.TableSchema,
        opts ...WireFormatOptions,
) Row {
        var format WireFormatOptions
        if len(opts) > 0 {
                format = opts[0]
        }
        fields := []*enginepb.FieldSchema(nil)
        if schema != nil {
                fields = schema.GetFields()
        }
        out := Row{F: make([]Cell, 0, len(cells))}
        for i, c := range cells {
                var field *enginepb.FieldSchema
                if i < len(fields) {
                        field = fields[i]
                }
                out.F = append(out.F, encodeCellForField(ValueToCell(c), field, format))
        }
        return out
}

func encodeCellForField(cell Cell, field *enginepb.FieldSchema, format WireFormatOptions) Cell {
        if cell.V == nil || field == nil {
                return cell
        }
        fieldType := field.GetType()
        if strings.HasPrefix(fieldType, "ARRAY<") {
                elements, ok := cell.V.([]Cell)
                if !ok {
                        return cell
                }
                elemField := arrayElementFieldSchema(field)
                out := make([]Cell, len(elements))
                for i, el := range elements {
                        out[i] = encodeCellForField(el, elemField, format)
                }
                return Cell{V: out}
        }
        switch fieldType {
        case "TIMESTAMP":
                s, ok := cell.V.(string)
                if !ok {
                        return cell
                }
                if strings.TrimSpace(s) == "" {
                        return Cell{V: nil}
                }
                if micros, err := TimestampStringToMicros(s); err == nil {
                        if format.UseInt64Timestamp {
                                if n, parseErr := strconv.ParseInt(micros, 10, 64); parseErr == nil {
                                        return Cell{V: n}
                                }
                        }
                        return Cell{V: micros}
                }
                return cell
        case "STRUCT", "RECORD":
                row, ok := cell.V.(Row)
                if !ok {
                        return cell
                }
                subFields := field.GetFields()
                out := make([]Cell, len(row.F))
                for i, subCell := range row.F {
                        var subField *enginepb.FieldSchema
                        if i < len(subFields) {
                                subField = subFields[i]
                        }
                        out[i] = encodeCellForField(subCell, subField, format)
                }
                return Cell{V: Row{F: out}}
        default:
                return cell
        }
}

func arrayElementFieldSchema(field *enginepb.FieldSchema) *enginepb.FieldSchema {
        t := field.GetType()
        if !strings.HasPrefix(t, "ARRAY<") {
                return field
        }
        inner := strings.TrimSuffix(strings.TrimPrefix(t, "ARRAY<"), ">")
        return &enginepb.FieldSchema{Type: inner}
}

// TimestampStringToMicros parses an engine TIMESTAMP wire string and
// returns the BigQuery REST query-result encoding: decimal microseconds
// since 1970-01-01 UTC.
func TimestampStringToMicros(s string) (string, error) {
        s = strings.TrimSpace(s)
        if s == "" {
                return "", errors.New("empty timestamp")
        }
        // Storage Read and some engine paths already emit epoch micros as decimal digits.
        if isDecimalIntString(s) {
                return s, nil
        }
        t, err := parseTimestampWireString(s)
        if err != nil {
                return "", err
        }
        utc := t.UTC()
        micros := utc.Unix()*1_000_000 + int64(utc.Nanosecond()/1000)
        return strconv.FormatInt(micros, 10), nil
}

func parseTimestampWireString(s string) (time.Time, error) {
        s = strings.TrimSpace(s)
        s = strings.Replace(s, "T", " ", 1)
        s = strings.Replace(s, "+00:00", "+00", 1)
        s = strings.Replace(s, "Z", "+00", 1)
        layouts := []string{
                "2006-01-02 15:04:05.999999-07",
                "2006-01-02 15:04:05-07",
                "2006-01-02 15:04:05.999999",
                "2006-01-02 15:04:05",
        }
        var lastErr error
        for _, layout := range layouts {
                t, err := time.Parse(layout, s)
                if err == nil {
                        return t, nil
                }
                lastErr = err
        }
        return time.Time{}, lastErr
}

func isDecimalIntString(s string) bool {
        if s == "" {
                return false
        }
        for _, r := range s {
                if r < '0' || r > '9' {
                        return false
                }
        }
        return true
}

package bqtypes

import (
        "bytes"
        "encoding/json"
        "fmt"
        "maps"
        "strconv"
)

// injectJSONStringField forces a top-level string field onto an encoded object.
func injectJSONStringField(raw []byte, key, value string) ([]byte, error) {
        var doc map[string]json.RawMessage
        if err := json.Unmarshal(raw, &doc); err != nil {
                return nil, err
        }
        encoded, err := json.Marshal(value)
        if err != nil {
                return nil, err
        }
        doc[key] = encoded
        return json.Marshal(doc)
}

// marshalWithoutJSONField JSON-encodes v while dropping one top-level field.
func marshalWithoutJSONField(v any, dropField string) ([]byte, error) {
        raw, err := json.Marshal(v)
        if err != nil {
                return nil, err
        }
        var doc map[string]json.RawMessage
        if err := json.Unmarshal(raw, &doc); err != nil {
                return nil, err
        }
        delete(doc, dropField)
        return json.Marshal(doc)
}

// ResourceLabels is a BigQuery labels map on Dataset/Table resources.
// UnmarshalJSON accepts null values as deletion markers (the upstream
// Node `deleteLabelDataset` sample sends `{color: null}` via
// setMetadata). MarshalJSON always emits `{}` for a nil map so client
// libraries that call `Object.entries(resource.labels)` never see a
// missing field.
type ResourceLabels map[string]string

// MarshalJSON implements json.Marshaler.
func (l ResourceLabels) MarshalJSON() ([]byte, error) {
        if l == nil {
                return []byte("{}"), nil
        }
        return json.Marshal(map[string]string(l))
}

// UnmarshalJSON implements json.Unmarshaler.
func (l *ResourceLabels) UnmarshalJSON(data []byte) error {
        var raw map[string]json.RawMessage
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        out := make(ResourceLabels, len(raw))
        for k, v := range raw {
                if bytes.Equal(bytes.TrimSpace(v), []byte("null")) {
                        continue
                }
                var s string
                if err := json.Unmarshal(v, &s); err != nil {
                        return fmt.Errorf("labels[%q]: %w", k, err)
                }
                out[k] = s
        }
        *l = out
        return nil
}

// MillisTimestamp is a BigQuery REST int64 millis-since-epoch field
// encoded as a decimal string on the wire. UnmarshalJSON also accepts
// JSON numbers because the Node client sometimes posts expirationTime
// as a number on tables.insert.
type MillisTimestamp string

// String returns the canonical decimal string form.
func (t MillisTimestamp) String() string {
        return string(t)
}

// UnmarshalJSON implements json.Unmarshaler.
func (t *MillisTimestamp) UnmarshalJSON(data []byte) error {
        data = bytes.TrimSpace(data)
        if bytes.Equal(data, []byte("null")) {
                *t = ""
                return nil
        }
        if len(data) > 0 && data[0] == '"' {
                var s string
                if err := json.Unmarshal(data, &s); err != nil {
                        return err
                }
                *t = MillisTimestamp(s)
                return nil
        }
        var n json.Number
        if err := json.Unmarshal(data, &n); err != nil {
                return fmt.Errorf("millis timestamp: %w", err)
        }
        i, err := n.Int64()
        if err != nil {
                return fmt.Errorf("millis timestamp: %w", err)
        }
        *t = MillisTimestamp(strconv.FormatInt(i, 10))
        return nil
}

type labelsPatch struct {
        values  map[string]string
        delete  []string
        present bool
}

func parseLabelsJSON(data json.RawMessage) (labelsPatch, error) {
        var patch labelsPatch
        if data == nil {
                return patch, nil
        }
        patch.present = true
        var raw map[string]json.RawMessage
        if err := json.Unmarshal(data, &raw); err != nil {
                return patch, err
        }
        patch.values = make(map[string]string, len(raw))
        for key, val := range raw {
                if bytes.Equal(bytes.TrimSpace(val), []byte("null")) {
                        patch.delete = append(patch.delete, key)
                        continue
                }
                var s string
                if err := json.Unmarshal(val, &s); err != nil {
                        return patch, fmt.Errorf("labels[%q]: %w", key, err)
                }
                patch.values[key] = s
        }
        return patch, nil
}

// ApplyLabelsPatch merges explicit labels updates, including JSON-null deletions.
func ApplyLabelsPatch(
        base map[string]string,
        present bool,
        values map[string]string,
        deleteKeys []string,
) map[string]string {
        if !present {
                return base
        }
        out := make(map[string]string, len(base)+len(values))
        maps.Copy(out, base)
        for _, k := range deleteKeys {
                delete(out, k)
        }
        maps.Copy(out, values)
        return out
}

// UnmarshalWriteDisposition accepts a JSON string or a one-element
// string array (the upstream `relaxColumnQueryAppend` sample posts
// writeDisposition as ['WRITE_APPEND']).
func UnmarshalWriteDisposition(raw json.RawMessage) (string, error) {
        if len(raw) == 0 || bytes.Equal(bytes.TrimSpace(raw), []byte("null")) {
                return "", nil
        }
        trim := bytes.TrimSpace(raw)
        if len(trim) > 0 && trim[0] == '[' {
                var arr []string
                if err := json.Unmarshal(trim, &arr); err != nil {
                        return "", fmt.Errorf("writeDisposition: %w", err)
                }
                if len(arr) == 1 {
                        return arr[0], nil
                }
                return "", fmt.Errorf("writeDisposition: want single-element array, got %d elements", len(arr))
        }
        var s string
        if err := json.Unmarshal(trim, &s); err != nil {
                return "", fmt.Errorf("writeDisposition: %w", err)
        }
        return s, nil
}

// Package copy implements synchronous BigQuery COPY jobs.
package copy

import (
        "context"
        "errors"
        "fmt"
        "io"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
        "github.com/vantaboard/bigquery-emulator/gateway/snapshots"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

const (
        writeTruncate = "WRITE_TRUNCATE"
        writeEmpty    = "WRITE_EMPTY"
        writeAppend   = "WRITE_APPEND"
        createNever   = "CREATE_NEVER"

        // OperationCopy is the default copy job operation (table-to-table copy).
        OperationCopy = "COPY"
        // OperationSnapshot creates a SNAPSHOT destination from a TABLE source.
        OperationSnapshot = "SNAPSHOT"
        // OperationRestore creates a TABLE destination from a SNAPSHOT source.
        OperationRestore = "RESTORE"
        // OperationClone is accepted but treated like COPY (clone billing is N/A).
        OperationClone = "CLONE"
)

// NormalizeOperationType maps empty/unspecified operationType to COPY.
func NormalizeOperationType(op string) string {
        switch strings.ToUpper(strings.TrimSpace(op)) {
        case "", "OPERATION_TYPE_UNSPECIFIED":
                return OperationCopy
        default:
                return strings.ToUpper(strings.TrimSpace(op))
        }
}

func validateOperationType(op string) error {
        switch op {
        case OperationCopy, OperationSnapshot, OperationRestore, OperationClone:
                return nil
        default:
                return fmt.Errorf("unsupported operationType %q", op)
        }
}

// Result captures copy-job statistics.
type Result struct {
        CopiedRows         int64
        CopiedLogicalBytes int64
}

// Execute runs a synchronous COPY job.
func Execute(ctx context.Context, catalog enginepb.CatalogClient, query enginepb.QueryClient,
        snapStore *snapshots.Store, cfg *jobs.JobConfigurationCopy, defaultProject string,
) (Result, error) {
        if cfg == nil {
                return Result{}, errors.New("copy configuration is required")
        }
        op := NormalizeOperationType(cfg.OperationType)
        if err := validateOperationType(op); err != nil {
                return Result{}, err
        }
        if cfg.DestinationTable == nil || cfg.DestinationTable.TableID == "" {
                return Result{}, errors.New("destinationTable.tableId is required")
        }
        sources := sourceRefs(cfg, defaultProject)
        if len(sources) == 0 {
                return Result{}, errors.New("sourceTable or sourceTables is required")
        }

        destProject := cfg.DestinationTable.ProjectID
        if destProject == "" {
                destProject = defaultProject
        }
        destDataset := cfg.DestinationTable.DatasetID
        destTable := cfg.DestinationTable.TableID

        wd := cfg.WriteDisposition
        if wd == "" {
                if len(sources) > 1 {
                        wd = writeAppend
                } else {
                        wd = writeEmpty
                }
        }
        cd := cfg.CreateDisposition

        if err := checkCreateDisposition(ctx, catalog, cd, destProject, destDataset, destTable); err != nil {
                return Result{}, err
        }

        if shouldUseSQLCopy(ctx, catalog, snapStore, query, sources) {
                return executeSQLCopy(ctx, catalog, query, sources, destProject, destDataset, destTable, wd, cd)
        }
        if hasSnapshotSource(sources) {
                return executeCatalogCopy(ctx, catalog, snapStore, sources, destProject, destDataset, destTable, wd)
        }
        if query != nil {
                if result, err := executeSQLCopy(
                        ctx,
                        catalog,
                        query,
                        sources,
                        destProject,
                        destDataset,
                        destTable,
                        wd,
                        cd,
                ); err == nil {
                        return result, nil
                }
        }
        return executeCatalogCopy(ctx, catalog, snapStore, sources, destProject, destDataset, destTable, wd)
}

// shouldUseSQLCopy returns true when a decorated source references a live
// table and the engine SQL path (FOR SYSTEM_TIME AS OF) should be used.
// Deleted-table decorators resolve via snapshots.Store in catalog copy.
func shouldUseSQLCopy(ctx context.Context, catalog enginepb.CatalogClient,
        snapStore *snapshots.Store, query enginepb.QueryClient,
        sources []bqtypes.TableReference,
) bool {
        if query == nil {
                return false
        }
        for _, src := range sources {
                base, epoch, decorated := snapshots.ParseDecorator(src.TableID)
                if !decorated {
                        continue
                }
                if snapStore != nil {
                        if _, err := snapStore.ResolveAtEpoch(src.ProjectID, src.DatasetID, base, epoch); err == nil {
                                continue
                        }
                }
                ref := &enginepb.TableRef{
                        ProjectId: src.ProjectID,
                        DatasetId: src.DatasetID,
                        TableId:   base,
                }
                if tableExists(ctx, catalog, ref) {
                        return true
                }
        }
        return false
}

func checkCreateDisposition(ctx context.Context, catalog enginepb.CatalogClient,
        cd, projectID, datasetID, tableID string,
) error {
        if cd != createNever {
                return nil
        }
        ref := &enginepb.TableRef{ProjectId: projectID, DatasetId: datasetID, TableId: tableID}
        if !tableExists(ctx, catalog, ref) {
                return status.Error(codes.NotFound,
                        fmt.Sprintf("Not found: Table %s:%s.%s", projectID, datasetID, tableID))
        }
        return nil
}

func sourceRefs(cfg *jobs.JobConfigurationCopy, defaultProject string) []bqtypes.TableReference {
        if len(cfg.SourceTables) > 0 {
                out := make([]bqtypes.TableReference, len(cfg.SourceTables))
                copy(out, cfg.SourceTables)
                for i := range out {
                        if out[i].ProjectID == "" {
                                out[i].ProjectID = defaultProject
                        }
                }
                return out
        }
        if cfg.SourceTable != nil {
                ref := *cfg.SourceTable
                if ref.ProjectID == "" {
                        ref.ProjectID = defaultProject
                }
                return []bqtypes.TableReference{ref}
        }
        return nil
}

func hasSnapshotSource(refs []bqtypes.TableReference) bool {
        for _, ref := range refs {
                if _, _, ok := snapshots.ParseDecorator(ref.TableID); ok {
                        return true
                }
        }
        return false
}

func executeSQLCopy(ctx context.Context, catalog enginepb.CatalogClient, query enginepb.QueryClient,
        sources []bqtypes.TableReference, destProject, destDataset, destTable, wd, cd string,
) (Result, error) {
        if cd == createNever {
                ref := &enginepb.TableRef{ProjectId: destProject, DatasetId: destDataset, TableId: destTable}
                if !tableExists(ctx, catalog, ref) {
                        return Result{}, status.Error(codes.NotFound,
                                fmt.Sprintf("Not found: Table %s:%s.%s", destProject, destDataset, destTable))
                }
        }
        sql, err := buildCopySQL(sources, destDataset, destTable, wd)
        if err != nil {
                return Result{}, err
        }
        stream, err := query.ExecuteQuery(ctx, &enginepb.QueryRequest{
                ProjectId: destProject,
                Sql:       sql,
        })
        if err != nil {
                return Result{}, err
        }
        for {
                _, recvErr := stream.Recv()
                if recvErr != nil {
                        if errors.Is(recvErr, io.EOF) {
                                break
                        }
                        return Result{}, recvErr
                }
        }
        return countDestinationRows(ctx, catalog, destProject, destDataset, destTable)
}

func buildCopySQL(sources []bqtypes.TableReference, destDataset, destTable, wd string) (string, error) {
        selects := make([]string, 0, len(sources))
        for _, src := range sources {
                base, epoch, decorated := snapshots.ParseDecorator(src.TableID)
                from := fmt.Sprintf("%s.%s", quoteIdent(src.DatasetID), quoteIdent(base))
                if decorated {
                        from = fmt.Sprintf("%s FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
                                from, epoch)
                }
                selects = append(selects, "SELECT * FROM "+from)
        }
        fromClause := strings.Join(selects, " UNION ALL ")
        dest := fmt.Sprintf("%s.%s", quoteIdent(destDataset), quoteIdent(destTable))
        switch wd {
        case writeTruncate:
                return fmt.Sprintf("CREATE OR REPLACE TABLE %s AS %s", dest, fromClause), nil
        case writeEmpty:
                return fmt.Sprintf("CREATE TABLE %s AS %s", dest, fromClause), nil
        case writeAppend:
                return fmt.Sprintf("INSERT INTO %s %s", dest, fromClause), nil
        default:
                return "", fmt.Errorf("unsupported writeDisposition %q", wd)
        }
}

func quoteIdent(id string) string {
        return "`" + strings.ReplaceAll(id, "`", "``") + "`"
}

func executeCatalogCopy(ctx context.Context, catalog enginepb.CatalogClient, snapStore *snapshots.Store,
        sources []bqtypes.TableReference, destProject, destDataset, destTable, wd string,
) (Result, error) {
        var mergedSchema *enginepb.TableSchema
        var mergedRows []*enginepb.DataRow
        var totalBytes int64

        for _, src := range sources {
                schema, rows, err := readSource(ctx, catalog, snapStore, src)
                if err != nil {
                        return Result{}, err
                }
                if mergedSchema == nil {
                        mergedSchema = schema
                } else if !schemasCompatible(mergedSchema, schema) {
                        return Result{}, errors.New("source tables must have identical schemas for multi-source copy")
                }
                mergedRows = append(mergedRows, rows...)
                totalBytes += estimateRowBytes(rows)
        }
        if mergedSchema == nil {
                return Result{}, errors.New("could not resolve source table schema")
        }

        if err := ensureDataset(ctx, catalog, destProject, destDataset); err != nil {
                return Result{}, err
        }
        if err := applyWriteDisposition(ctx, catalog, destProject, destDataset, destTable, mergedSchema, wd); err != nil {
                return Result{}, err
        }

        ref := seed.TableRef{ProjectID: destProject, DatasetID: destDataset, TableID: destTable}
        applier := seed.NewCatalogApplier(catalog)
        rowMaps := protoRowsToMaps(mergedSchema, mergedRows)
        inserted, err := applier.InsertRows(ctx, ref, mergedSchema, rowMaps)
        if err != nil {
                return Result{}, err
        }
        return Result{
                CopiedRows:         int64(inserted),
                CopiedLogicalBytes: totalBytes,
        }, nil
}

func readSource(ctx context.Context, catalog enginepb.CatalogClient, snapStore *snapshots.Store,
        ref bqtypes.TableReference,
) (*enginepb.TableSchema, []*enginepb.DataRow, error) {
        base, epoch, decorated := snapshots.ParseDecorator(ref.TableID)
        if decorated {
                entry, err := snapStore.ResolveAtEpoch(ref.ProjectID, ref.DatasetID, base, epoch)
                if err != nil {
                        return nil, nil, err
                }
                return entry.Schema, entry.Rows, nil
        }
        tableRef := &enginepb.TableRef{
                ProjectId: ref.ProjectID,
                DatasetId: ref.DatasetID,
                TableId:   base,
        }
        desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
        if err != nil {
                return nil, nil, fmt.Errorf("source table %s.%s.%s: %w",
                        ref.ProjectID, ref.DatasetID, base, err)
        }
        rows, err := listAllRows(ctx, catalog, tableRef)
        if err != nil {
                return nil, nil, err
        }
        return desc.GetSchema(), rows, nil
}

func listAllRows(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef,
) ([]*enginepb.DataRow, error) {
        var out []*enginepb.DataRow
        start := int64(0)
        const page = 10_000
        for {
                resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                        Table:      ref,
                        StartIndex: start,
                        MaxResults: page,
                })
                if err != nil {
                        return nil, err
                }
                rows := resp.GetRows()
                if len(rows) == 0 {
                        break
                }
                out = append(out, rows...)
                start += int64(len(rows))
                if start >= resp.GetTotalRows() {
                        break
                }
        }
        return out, nil
}

func protoRowsToMaps(schema *enginepb.TableSchema, rows []*enginepb.DataRow) []map[string]any {
        out := make([]map[string]any, 0, len(rows))
        fields := schema.GetFields()
        for _, row := range rows {
                m := make(map[string]any, len(fields))
                cells := row.GetCells()
                for i, f := range fields {
                        if i < len(cells) {
                                m[f.GetName()] = cellToAny(cells[i])
                        }
                }
                out = append(out, m)
        }
        return out
}

func cellToAny(c *enginepb.Cell) any {
        if c == nil || c.GetNullValue() {
                return nil
        }
        return c.GetStringValue()
}

func estimateRowBytes(rows []*enginepb.DataRow) int64 {
        var n int64
        for _, row := range rows {
                for _, c := range row.GetCells() {
                        if c != nil {
                                n += int64(len(c.GetStringValue()))
                        }
                }
        }
        return n
}

func ensureDataset(ctx context.Context, catalog enginepb.CatalogClient, projectID, datasetID string) error {
        applier := seed.NewCatalogApplier(catalog)
        _, err := applier.EnsureDataset(ctx, projectID, datasetID, "US")
        return err
}

func applyWriteDisposition(ctx context.Context, catalog enginepb.CatalogClient,
        projectID, datasetID, tableID string, schema *enginepb.TableSchema, wd string,
) error {
        ref := &enginepb.TableRef{ProjectId: projectID, DatasetId: datasetID, TableId: tableID}
        exists := tableExists(ctx, catalog, ref)
        switch wd {
        case writeTruncate:
                if exists {
                        if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: ref}); err != nil {
                                return fmt.Errorf("WRITE_TRUNCATE drop: %w", err)
                        }
                }
                _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{Table: ref, Schema: schema})
                return err
        case writeEmpty:
                if exists {
                        return status.Error(codes.AlreadyExists,
                                fmt.Sprintf("Already Exists: Table %s:%s.%s", projectID, datasetID, tableID))
                }
                _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{Table: ref, Schema: schema})
                return err
        default:
                if !exists {
                        _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{Table: ref, Schema: schema})
                        if err != nil && status.Code(err) != codes.AlreadyExists {
                                return err
                        }
                }
                return nil
        }
}

func tableExists(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef) bool {
        _, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
        return err == nil
}

func schemasCompatible(a, b *enginepb.TableSchema) bool {
        if a == nil || b == nil {
                return a == b
        }
        af, bf := a.GetFields(), b.GetFields()
        if len(af) != len(bf) {
                return false
        }
        for i := range af {
                if af[i].GetName() != bf[i].GetName() ||
                        af[i].GetType() != bf[i].GetType() ||
                        af[i].GetMode() != bf[i].GetMode() {
                        return false
                }
        }
        return true
}

func countDestinationRows(ctx context.Context, catalog enginepb.CatalogClient,
        projectID, datasetID, tableID string,
) (Result, error) {
        resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                Table: &enginepb.TableRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
                StartIndex: 0,
                MaxResults: 0,
        })
        if err != nil {
                return Result{}, err
        }
        n := resp.GetTotalRows()
        return Result{CopiedRows: n, CopiedLogicalBytes: n}, nil
}

// FormatStatistics maps Result into jobs.CopyStatistics.
func FormatStatistics(r Result) *jobs.CopyStatistics {
        return &jobs.CopyStatistics{
                CopiedRows:         strconv.FormatInt(r.CopiedRows, 10),
                CopiedLogicalBytes: strconv.FormatInt(r.CopiedLogicalBytes, 10),
        }
}

// Package engine is the Go-side gRPC client for the BigQuery emulator's
// C++ engine.
//
// The gateway and engine are two separate processes that talk over an
// in-process gRPC channel (see proto/emulator.proto and
// gateway/enginepb). This package wraps the dial / health-probe / close
// dance so the gateway lifecycle code in gateway.go and the per-request
// HTTP handlers in gateway/handlers can share one connection.
//
// Client mirrors the way cloud-spanner-emulator's gateway connects to
// emulator_main: a single insecure loopback channel, one shared
// connection per gateway process, health checked via grpc.health.v1
// before any business RPCs are dispatched.
package engine

import (
        "context"
        "errors"
        "fmt"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/grpc"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/credentials/insecure"
        healthpb "google.golang.org/grpc/health/grpc_health_v1"
        "google.golang.org/grpc/status"
)

// Client is a thin facade around the *grpc.ClientConn that the gateway
// uses to talk to the C++ engine. It owns the connection so callers can
// share one channel across all handlers and only have to Close once at
// shutdown.
//
// Catalog and Query are the two business-logic clients defined in
// proto/emulator.proto; StorageRead and StorageWrite are the internal
// storage contracts the public bqstorage shim adapts. Health is the
// standard grpc.health.v1 probe the engine wires up via
// grpc::EnableDefaultHealthCheckService (see frontend/server/server.cc).
type Client struct {
        conn *grpc.ClientConn

        Catalog      enginepb.CatalogClient
        Query        enginepb.QueryClient
        SQLTools     enginepb.SqlToolsClient
        StorageRead  enginepb.StorageReadClient
        StorageWrite enginepb.StorageWriteClient
        Health       healthpb.HealthClient
}

// Dial opens a gRPC channel to the engine listening at address (typically
// "host:port" on the loopback interface) and returns a Client that wraps
// it. The connection uses insecure credentials because the channel never
// leaves the local machine; the engine subprocess's listening port is a
// gateway-internal contract, not a public API.
//
// Dial does not wait for the engine to be ready. Call WaitForReady (or
// the gateway's own startup probe) before issuing business RPCs. The
// returned Client owns its connection; callers must Close it at
// shutdown.
func Dial(address string) (*Client, error) {
        if address == "" {
                return nil, errors.New("engine: empty engine address")
        }
        conn, err := grpc.NewClient(
                address,
                grpc.WithTransportCredentials(insecure.NewCredentials()),
        )
        if err != nil {
                return nil, fmt.Errorf("engine: dial %s: %w", address, err)
        }
        return &Client{
                conn:         conn,
                Catalog:      enginepb.NewCatalogClient(conn),
                Query:        enginepb.NewQueryClient(conn),
                SQLTools:     enginepb.NewSqlToolsClient(conn),
                StorageRead:  enginepb.NewStorageReadClient(conn),
                StorageWrite: enginepb.NewStorageWriteClient(conn),
                Health:       healthpb.NewHealthClient(conn),
        }, nil
}

// Close releases the underlying gRPC channel. It is safe to call on a
// nil receiver (gateway constructed without an engine subprocess). It
// is also idempotent; subsequent calls are no-ops because *grpc.ClientConn
// itself is idempotent on Close.
func (c *Client) Close() error {
        if c == nil || c.conn == nil {
                return nil
        }
        return c.conn.Close()
}

// healthRetryInterval is the gap between successive grpc.health.v1.Check
// probes inside WaitForReady. Tuned to keep the worst-case startup
// latency low (we expect the engine subprocess to bind its socket within
// a few hundred milliseconds) without burning CPU on tight retries.
const healthRetryInterval = 100 * time.Millisecond

// WaitForReady polls grpc.health.v1.Health.Check on the empty service
// name until it reports SERVING. A SERVING response means the engine has
// finished BuildAndStart and called SetServingStatus("", true) (see
// frontend/server/server.cc), which is the moment business RPCs become
// safe to issue.
//
// The loop is bounded by ctx; callers typically wrap a context.Background
// with a 30s timeout (see gateway.waitForEngine). Transient errors
// (Unavailable, DeadlineExceeded, Connection refused before the engine
// has started listening) are retried at healthRetryInterval; non-
// transient errors (for example Unimplemented, returned by an engine
// without the health service registered) are surfaced immediately so we
// fail fast instead of waiting out the timeout.
//
// Returns nil on SERVING, ctx.Err() on timeout/cancel, or a wrapped
// status error for non-retriable conditions.
func (c *Client) WaitForReady(ctx context.Context) error {
        if c == nil {
                return errors.New("engine: nil client")
        }
        req := &healthpb.HealthCheckRequest{Service: ""}
        for {
                // Each Check inherits the outer deadline so the loop cannot run
                // past it; the per-RPC deadline is the only timeout grpc-go
                // honors here.
                resp, err := c.Health.Check(ctx, req)
                switch {
                case err == nil && resp.GetStatus() == healthpb.HealthCheckResponse_SERVING:
                        return nil
                case err == nil:
                        // Engine reachable but not yet SERVING (NOT_SERVING /
                        // SERVICE_UNKNOWN / UNKNOWN). Keep polling; the engine may
                        // flip to SERVING once it finishes initialization.
                case isTransientHealthError(err):
                        // Engine still starting up: socket not yet listening, RPC
                        // queue not yet ready. Sleep and retry.
                default:
                        return fmt.Errorf("engine: health check: %w", err)
                }

                select {
                case <-ctx.Done():
                        return fmt.Errorf("engine: wait for ready: %w", ctx.Err())
                case <-time.After(healthRetryInterval):
                }
        }
}

// isTransientHealthError reports whether err looks like the engine
// simply has not finished booting yet, so the caller should retry. Any
// other error (Unimplemented, InvalidArgument, ...) is a real failure
// that should surface immediately.
func isTransientHealthError(err error) bool {
        if err == nil {
                return false
        }
        switch status.Code(err) {
        case codes.Unavailable, codes.DeadlineExceeded, codes.Canceled, codes.ResourceExhausted:
                return true
        default:
                return false
        }
}

// emulator.proto is the internal contract between the Go REST gateway and
// the C++ engine. It is intentionally minimal: the gateway owns the
// public-facing BigQuery REST shape, and only forwards the bits that
// actually need GoogleSQL to do their job.
//
// Code generation is wired up via buf (see ../buf.gen.yaml) for the Go
// side and via Bazel (see ./BUILD.bazel) for the C++ side. The Go
// stubs land in gateway/enginepb/ and are checked in so `go build`
// works without an extra codegen step; the C++ stubs are generated
// fresh into the Bazel output tree.

// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
//         protoc-gen-go v1.36.11
//         protoc        v7.35.0
// source: emulator.proto

package enginepb

import (
        protoreflect "google.golang.org/protobuf/reflect/protoreflect"
        protoimpl "google.golang.org/protobuf/runtime/protoimpl"
        reflect "reflect"
        sync "sync"
        unsafe "unsafe"
)

const (
        // Verify that this generated code is sufficiently up-to-date.
        _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
        // Verify that runtime/protoimpl is sufficiently up-to-date.
        _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)

type DatasetRef struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        ProjectId     string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        DatasetId     string                 `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DatasetRef) Reset() {
        *x = DatasetRef{}
        mi := &file_emulator_proto_msgTypes[0]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DatasetRef) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DatasetRef) ProtoMessage() {}

func (x *DatasetRef) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[0]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DatasetRef.ProtoReflect.Descriptor instead.
func (*DatasetRef) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{0}
}

func (x *DatasetRef) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *DatasetRef) GetDatasetId() string {
        if x != nil {
                return x.DatasetId
        }
        return ""
}

type TableRef struct {
        state     protoimpl.MessageState `protogen:"open.v1"`
        ProjectId string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        DatasetId string                 `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
        TableId   string                 `protobuf:"bytes,3,opt,name=table_id,json=tableId,proto3" json:"table_id,omitempty"`
        // BigQuery REST `type` when known (e.g. VIEW). Empty defaults to TABLE.
        TableType     string `protobuf:"bytes,4,opt,name=table_type,json=tableType,proto3" json:"table_type,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *TableRef) Reset() {
        *x = TableRef{}
        mi := &file_emulator_proto_msgTypes[1]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *TableRef) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*TableRef) ProtoMessage() {}

func (x *TableRef) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[1]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use TableRef.ProtoReflect.Descriptor instead.
func (*TableRef) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{1}
}

func (x *TableRef) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *TableRef) GetDatasetId() string {
        if x != nil {
                return x.DatasetId
        }
        return ""
}

func (x *TableRef) GetTableId() string {
        if x != nil {
                return x.TableId
        }
        return ""
}

func (x *TableRef) GetTableType() string {
        if x != nil {
                return x.TableType
        }
        return ""
}

type FieldSchema struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        Name  string                 `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        // BigQuery type names: STRING, INT64, FLOAT64, BOOL, TIMESTAMP, DATE,
        // TIME, DATETIME, GEOGRAPHY, BYTES, NUMERIC, BIGNUMERIC, JSON, STRUCT,
        // ARRAY, ...
        Type string `protobuf:"bytes,2,opt,name=type,proto3" json:"type,omitempty"`
        // NULLABLE | REQUIRED | REPEATED. Empty defaults to NULLABLE.
        Mode          string         `protobuf:"bytes,3,opt,name=mode,proto3" json:"mode,omitempty"`
        Description   string         `protobuf:"bytes,4,opt,name=description,proto3" json:"description,omitempty"`
        Fields        []*FieldSchema `protobuf:"bytes,5,rep,name=fields,proto3" json:"fields,omitempty"` // for STRUCT/RECORD.
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *FieldSchema) Reset() {
        *x = FieldSchema{}
        mi := &file_emulator_proto_msgTypes[2]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FieldSchema) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FieldSchema) ProtoMessage() {}

func (x *FieldSchema) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[2]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FieldSchema.ProtoReflect.Descriptor instead.
func (*FieldSchema) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{2}
}

func (x *FieldSchema) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

func (x *FieldSchema) GetType() string {
        if x != nil {
                return x.Type
        }
        return ""
}

func (x *FieldSchema) GetMode() string {
        if x != nil {
                return x.Mode
        }
        return ""
}

func (x *FieldSchema) GetDescription() string {
        if x != nil {
                return x.Description
        }
        return ""
}

func (x *FieldSchema) GetFields() []*FieldSchema {
        if x != nil {
                return x.Fields
        }
        return nil
}

type TableSchema struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Fields        []*FieldSchema         `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *TableSchema) Reset() {
        *x = TableSchema{}
        mi := &file_emulator_proto_msgTypes[3]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *TableSchema) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*TableSchema) ProtoMessage() {}

func (x *TableSchema) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[3]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use TableSchema.ProtoReflect.Descriptor instead.
func (*TableSchema) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{3}
}

func (x *TableSchema) GetFields() []*FieldSchema {
        if x != nil {
                return x.Fields
        }
        return nil
}

type RegisterDatasetRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Dataset       *DatasetRef            `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
        Location      string                 `protobuf:"bytes,2,opt,name=location,proto3" json:"location,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *RegisterDatasetRequest) Reset() {
        *x = RegisterDatasetRequest{}
        mi := &file_emulator_proto_msgTypes[4]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RegisterDatasetRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RegisterDatasetRequest) ProtoMessage() {}

func (x *RegisterDatasetRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[4]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RegisterDatasetRequest.ProtoReflect.Descriptor instead.
func (*RegisterDatasetRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{4}
}

func (x *RegisterDatasetRequest) GetDataset() *DatasetRef {
        if x != nil {
                return x.Dataset
        }
        return nil
}

func (x *RegisterDatasetRequest) GetLocation() string {
        if x != nil {
                return x.Location
        }
        return ""
}

type RegisterDatasetResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *RegisterDatasetResponse) Reset() {
        *x = RegisterDatasetResponse{}
        mi := &file_emulator_proto_msgTypes[5]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RegisterDatasetResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RegisterDatasetResponse) ProtoMessage() {}

func (x *RegisterDatasetResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[5]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RegisterDatasetResponse.ProtoReflect.Descriptor instead.
func (*RegisterDatasetResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{5}
}

type DropDatasetRequest struct {
        state          protoimpl.MessageState `protogen:"open.v1"`
        Dataset        *DatasetRef            `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
        DeleteContents bool                   `protobuf:"varint,2,opt,name=delete_contents,json=deleteContents,proto3" json:"delete_contents,omitempty"`
        // JSON object snapshot of gateway REST-only dataset metadata (labels,
        // friendlyName, ...) captured at delete time for undelete round-trip.
        RestMetadataJson string `protobuf:"bytes,3,opt,name=rest_metadata_json,json=restMetadataJson,proto3" json:"rest_metadata_json,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *DropDatasetRequest) Reset() {
        *x = DropDatasetRequest{}
        mi := &file_emulator_proto_msgTypes[6]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DropDatasetRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DropDatasetRequest) ProtoMessage() {}

func (x *DropDatasetRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[6]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DropDatasetRequest.ProtoReflect.Descriptor instead.
func (*DropDatasetRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{6}
}

func (x *DropDatasetRequest) GetDataset() *DatasetRef {
        if x != nil {
                return x.Dataset
        }
        return nil
}

func (x *DropDatasetRequest) GetDeleteContents() bool {
        if x != nil {
                return x.DeleteContents
        }
        return false
}

func (x *DropDatasetRequest) GetRestMetadataJson() string {
        if x != nil {
                return x.RestMetadataJson
        }
        return ""
}

type DropDatasetResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DropDatasetResponse) Reset() {
        *x = DropDatasetResponse{}
        mi := &file_emulator_proto_msgTypes[7]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DropDatasetResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DropDatasetResponse) ProtoMessage() {}

func (x *DropDatasetResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[7]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DropDatasetResponse.ProtoReflect.Descriptor instead.
func (*DropDatasetResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{7}
}

type UndeleteDatasetRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Dataset       *DatasetRef            `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *UndeleteDatasetRequest) Reset() {
        *x = UndeleteDatasetRequest{}
        mi := &file_emulator_proto_msgTypes[8]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *UndeleteDatasetRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*UndeleteDatasetRequest) ProtoMessage() {}

func (x *UndeleteDatasetRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[8]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use UndeleteDatasetRequest.ProtoReflect.Descriptor instead.
func (*UndeleteDatasetRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{8}
}

func (x *UndeleteDatasetRequest) GetDataset() *DatasetRef {
        if x != nil {
                return x.Dataset
        }
        return nil
}

type UndeleteDatasetResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Restored gateway REST-only metadata JSON object from the dataset
        // tombstone sidecar (`restMetadata` in `_dataset.meta.json`).
        RestMetadataJson string `protobuf:"bytes,1,opt,name=rest_metadata_json,json=restMetadataJson,proto3" json:"rest_metadata_json,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *UndeleteDatasetResponse) Reset() {
        *x = UndeleteDatasetResponse{}
        mi := &file_emulator_proto_msgTypes[9]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *UndeleteDatasetResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*UndeleteDatasetResponse) ProtoMessage() {}

func (x *UndeleteDatasetResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[9]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use UndeleteDatasetResponse.ProtoReflect.Descriptor instead.
func (*UndeleteDatasetResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{9}
}

func (x *UndeleteDatasetResponse) GetRestMetadataJson() string {
        if x != nil {
                return x.RestMetadataJson
        }
        return ""
}

type ListDatasetsRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        ProjectId     string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListDatasetsRequest) Reset() {
        *x = ListDatasetsRequest{}
        mi := &file_emulator_proto_msgTypes[10]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListDatasetsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListDatasetsRequest) ProtoMessage() {}

func (x *ListDatasetsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[10]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListDatasetsRequest.ProtoReflect.Descriptor instead.
func (*ListDatasetsRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{10}
}

func (x *ListDatasetsRequest) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

type ListDatasetsResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Refs are returned in deterministic (lexicographic by dataset_id)
        // order so the gateway's listing is stable across calls. See
        // backend/storage/storage.h::Storage::ListDatasets.
        Datasets      []*DatasetRef `protobuf:"bytes,1,rep,name=datasets,proto3" json:"datasets,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListDatasetsResponse) Reset() {
        *x = ListDatasetsResponse{}
        mi := &file_emulator_proto_msgTypes[11]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListDatasetsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListDatasetsResponse) ProtoMessage() {}

func (x *ListDatasetsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[11]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListDatasetsResponse.ProtoReflect.Descriptor instead.
func (*ListDatasetsResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{11}
}

func (x *ListDatasetsResponse) GetDatasets() []*DatasetRef {
        if x != nil {
                return x.Datasets
        }
        return nil
}

type RegisterTableRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        Schema        *TableSchema           `protobuf:"bytes,2,opt,name=schema,proto3" json:"schema,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *RegisterTableRequest) Reset() {
        *x = RegisterTableRequest{}
        mi := &file_emulator_proto_msgTypes[12]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RegisterTableRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RegisterTableRequest) ProtoMessage() {}

func (x *RegisterTableRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[12]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RegisterTableRequest.ProtoReflect.Descriptor instead.
func (*RegisterTableRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{12}
}

func (x *RegisterTableRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

func (x *RegisterTableRequest) GetSchema() *TableSchema {
        if x != nil {
                return x.Schema
        }
        return nil
}

type RegisterTableResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *RegisterTableResponse) Reset() {
        *x = RegisterTableResponse{}
        mi := &file_emulator_proto_msgTypes[13]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RegisterTableResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RegisterTableResponse) ProtoMessage() {}

func (x *RegisterTableResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[13]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RegisterTableResponse.ProtoReflect.Descriptor instead.
func (*RegisterTableResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{13}
}

type DropTableRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DropTableRequest) Reset() {
        *x = DropTableRequest{}
        mi := &file_emulator_proto_msgTypes[14]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DropTableRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DropTableRequest) ProtoMessage() {}

func (x *DropTableRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[14]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DropTableRequest.ProtoReflect.Descriptor instead.
func (*DropTableRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{14}
}

func (x *DropTableRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

type DropTableResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DropTableResponse) Reset() {
        *x = DropTableResponse{}
        mi := &file_emulator_proto_msgTypes[15]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DropTableResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DropTableResponse) ProtoMessage() {}

func (x *DropTableResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[15]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DropTableResponse.ProtoReflect.Descriptor instead.
func (*DropTableResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{15}
}

type ListTablesRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Dataset       *DatasetRef            `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListTablesRequest) Reset() {
        *x = ListTablesRequest{}
        mi := &file_emulator_proto_msgTypes[16]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListTablesRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListTablesRequest) ProtoMessage() {}

func (x *ListTablesRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[16]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListTablesRequest.ProtoReflect.Descriptor instead.
func (*ListTablesRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{16}
}

func (x *ListTablesRequest) GetDataset() *DatasetRef {
        if x != nil {
                return x.Dataset
        }
        return nil
}

type ListTablesResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Refs are returned in deterministic (lexicographic by table_id)
        // order. See backend/storage/storage.h::Storage::ListTables.
        Tables        []*TableRef `protobuf:"bytes,1,rep,name=tables,proto3" json:"tables,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListTablesResponse) Reset() {
        *x = ListTablesResponse{}
        mi := &file_emulator_proto_msgTypes[17]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListTablesResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListTablesResponse) ProtoMessage() {}

func (x *ListTablesResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[17]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListTablesResponse.ProtoReflect.Descriptor instead.
func (*ListTablesResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{17}
}

func (x *ListTablesResponse) GetTables() []*TableRef {
        if x != nil {
                return x.Tables
        }
        return nil
}

type DescribeTableRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DescribeTableRequest) Reset() {
        *x = DescribeTableRequest{}
        mi := &file_emulator_proto_msgTypes[18]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DescribeTableRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DescribeTableRequest) ProtoMessage() {}

func (x *DescribeTableRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[18]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DescribeTableRequest.ProtoReflect.Descriptor instead.
func (*DescribeTableRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{18}
}

func (x *DescribeTableRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

type DescribeTableResponse struct {
        state  protoimpl.MessageState `protogen:"open.v1"`
        Schema *TableSchema           `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
        // BigQuery REST `type` when the table is a logical view (empty for
        // physical tables). Populated by Catalog.DescribeTable when the
        // target is registered in the view registry rather than storage.
        TableType string `protobuf:"bytes,2,opt,name=table_type,json=tableType,proto3" json:"table_type,omitempty"`
        // View SQL (`view.query` on the REST Table resource).
        ViewQuery string `protobuf:"bytes,3,opt,name=view_query,json=viewQuery,proto3" json:"view_query,omitempty"`
        // Always false for GoogleSQL views; carried for REST parity.
        ViewUseLegacySql bool `protobuf:"varint,4,opt,name=view_use_legacy_sql,json=viewUseLegacySql,proto3" json:"view_use_legacy_sql,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *DescribeTableResponse) Reset() {
        *x = DescribeTableResponse{}
        mi := &file_emulator_proto_msgTypes[19]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DescribeTableResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DescribeTableResponse) ProtoMessage() {}

func (x *DescribeTableResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[19]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DescribeTableResponse.ProtoReflect.Descriptor instead.
func (*DescribeTableResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{19}
}

func (x *DescribeTableResponse) GetSchema() *TableSchema {
        if x != nil {
                return x.Schema
        }
        return nil
}

func (x *DescribeTableResponse) GetTableType() string {
        if x != nil {
                return x.TableType
        }
        return ""
}

func (x *DescribeTableResponse) GetViewQuery() string {
        if x != nil {
                return x.ViewQuery
        }
        return ""
}

func (x *DescribeTableResponse) GetViewUseLegacySql() bool {
        if x != nil {
                return x.ViewUseLegacySql
        }
        return false
}

// DataRow is a single row of stored data. Cells are positional and
// align with the columns of the target table's `TableSchema`. The
// `Cell` shape is shared with `Query`'s result rows so the wire
// types stay consistent across the two read paths.
type DataRow struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Cells         []*Cell                `protobuf:"bytes,1,rep,name=cells,proto3" json:"cells,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DataRow) Reset() {
        *x = DataRow{}
        mi := &file_emulator_proto_msgTypes[20]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DataRow) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DataRow) ProtoMessage() {}

func (x *DataRow) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[20]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DataRow.ProtoReflect.Descriptor instead.
func (*DataRow) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{20}
}

func (x *DataRow) GetCells() []*Cell {
        if x != nil {
                return x.Cells
        }
        return nil
}

type InsertRowsRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        Rows          []*DataRow             `protobuf:"bytes,2,rep,name=rows,proto3" json:"rows,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *InsertRowsRequest) Reset() {
        *x = InsertRowsRequest{}
        mi := &file_emulator_proto_msgTypes[21]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *InsertRowsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*InsertRowsRequest) ProtoMessage() {}

func (x *InsertRowsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[21]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use InsertRowsRequest.ProtoReflect.Descriptor instead.
func (*InsertRowsRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{21}
}

func (x *InsertRowsRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

func (x *InsertRowsRequest) GetRows() []*DataRow {
        if x != nil {
                return x.Rows
        }
        return nil
}

type InsertRowsResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *InsertRowsResponse) Reset() {
        *x = InsertRowsResponse{}
        mi := &file_emulator_proto_msgTypes[22]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *InsertRowsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*InsertRowsResponse) ProtoMessage() {}

func (x *InsertRowsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[22]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use InsertRowsResponse.ProtoReflect.Descriptor instead.
func (*InsertRowsResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{22}
}

type ListRowsRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        Table *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        // Row index of the first row to return (0-based). Rows before this
        // index are skipped.
        StartIndex int64 `protobuf:"varint,2,opt,name=start_index,json=startIndex,proto3" json:"start_index,omitempty"`
        // Maximum number of rows in the response. A non-positive value
        // returns every remaining row from `start_index`.
        MaxResults    int64 `protobuf:"varint,3,opt,name=max_results,json=maxResults,proto3" json:"max_results,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListRowsRequest) Reset() {
        *x = ListRowsRequest{}
        mi := &file_emulator_proto_msgTypes[23]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListRowsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListRowsRequest) ProtoMessage() {}

func (x *ListRowsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[23]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListRowsRequest.ProtoReflect.Descriptor instead.
func (*ListRowsRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{23}
}

func (x *ListRowsRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

func (x *ListRowsRequest) GetStartIndex() int64 {
        if x != nil {
                return x.StartIndex
        }
        return 0
}

func (x *ListRowsRequest) GetMaxResults() int64 {
        if x != nil {
                return x.MaxResults
        }
        return 0
}

type RoutineRef struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        ProjectId     string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        DatasetId     string                 `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
        RoutineId     string                 `protobuf:"bytes,3,opt,name=routine_id,json=routineId,proto3" json:"routine_id,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *RoutineRef) Reset() {
        *x = RoutineRef{}
        mi := &file_emulator_proto_msgTypes[24]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RoutineRef) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RoutineRef) ProtoMessage() {}

func (x *RoutineRef) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[24]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RoutineRef.ProtoReflect.Descriptor instead.
func (*RoutineRef) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{24}
}

func (x *RoutineRef) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *RoutineRef) GetDatasetId() string {
        if x != nil {
                return x.DatasetId
        }
        return ""
}

func (x *RoutineRef) GetRoutineId() string {
        if x != nil {
                return x.RoutineId
        }
        return ""
}

type RoutineDescriptor struct {
        state   protoimpl.MessageState `protogen:"open.v1"`
        Routine *RoutineRef            `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
        // SCALAR_FUNCTION | AGGREGATE_FUNCTION | TABLE_VALUED_FUNCTION | PROCEDURE
        RoutineType    string `protobuf:"bytes,2,opt,name=routine_type,json=routineType,proto3" json:"routine_type,omitempty"`
        Language       string `protobuf:"bytes,3,opt,name=language,proto3" json:"language,omitempty"`
        DefinitionBody string `protobuf:"bytes,4,opt,name=definition_body,json=definitionBody,proto3" json:"definition_body,omitempty"`
        DdlSql         string `protobuf:"bytes,5,opt,name=ddl_sql,json=ddlSql,proto3" json:"ddl_sql,omitempty"`
        SignatureJson  string `protobuf:"bytes,6,opt,name=signature_json,json=signatureJson,proto3" json:"signature_json,omitempty"`
        unknownFields  protoimpl.UnknownFields
        sizeCache      protoimpl.SizeCache
}

func (x *RoutineDescriptor) Reset() {
        *x = RoutineDescriptor{}
        mi := &file_emulator_proto_msgTypes[25]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RoutineDescriptor) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RoutineDescriptor) ProtoMessage() {}

func (x *RoutineDescriptor) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[25]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RoutineDescriptor.ProtoReflect.Descriptor instead.
func (*RoutineDescriptor) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{25}
}

func (x *RoutineDescriptor) GetRoutine() *RoutineRef {
        if x != nil {
                return x.Routine
        }
        return nil
}

func (x *RoutineDescriptor) GetRoutineType() string {
        if x != nil {
                return x.RoutineType
        }
        return ""
}

func (x *RoutineDescriptor) GetLanguage() string {
        if x != nil {
                return x.Language
        }
        return ""
}

func (x *RoutineDescriptor) GetDefinitionBody() string {
        if x != nil {
                return x.DefinitionBody
        }
        return ""
}

func (x *RoutineDescriptor) GetDdlSql() string {
        if x != nil {
                return x.DdlSql
        }
        return ""
}

func (x *RoutineDescriptor) GetSignatureJson() string {
        if x != nil {
                return x.SignatureJson
        }
        return ""
}

type ListRoutinesRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Dataset       *DatasetRef            `protobuf:"bytes,1,opt,name=dataset,proto3" json:"dataset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListRoutinesRequest) Reset() {
        *x = ListRoutinesRequest{}
        mi := &file_emulator_proto_msgTypes[26]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListRoutinesRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListRoutinesRequest) ProtoMessage() {}

func (x *ListRoutinesRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[26]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListRoutinesRequest.ProtoReflect.Descriptor instead.
func (*ListRoutinesRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{26}
}

func (x *ListRoutinesRequest) GetDataset() *DatasetRef {
        if x != nil {
                return x.Dataset
        }
        return nil
}

type ListRoutinesResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Routines      []*RoutineDescriptor   `protobuf:"bytes,1,rep,name=routines,proto3" json:"routines,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListRoutinesResponse) Reset() {
        *x = ListRoutinesResponse{}
        mi := &file_emulator_proto_msgTypes[27]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListRoutinesResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListRoutinesResponse) ProtoMessage() {}

func (x *ListRoutinesResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[27]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListRoutinesResponse.ProtoReflect.Descriptor instead.
func (*ListRoutinesResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{27}
}

func (x *ListRoutinesResponse) GetRoutines() []*RoutineDescriptor {
        if x != nil {
                return x.Routines
        }
        return nil
}

type GetRoutineRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Routine       *RoutineRef            `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *GetRoutineRequest) Reset() {
        *x = GetRoutineRequest{}
        mi := &file_emulator_proto_msgTypes[28]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *GetRoutineRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*GetRoutineRequest) ProtoMessage() {}

func (x *GetRoutineRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[28]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use GetRoutineRequest.ProtoReflect.Descriptor instead.
func (*GetRoutineRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{28}
}

func (x *GetRoutineRequest) GetRoutine() *RoutineRef {
        if x != nil {
                return x.Routine
        }
        return nil
}

type GetRoutineResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Routine       *RoutineDescriptor     `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *GetRoutineResponse) Reset() {
        *x = GetRoutineResponse{}
        mi := &file_emulator_proto_msgTypes[29]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *GetRoutineResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*GetRoutineResponse) ProtoMessage() {}

func (x *GetRoutineResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[29]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use GetRoutineResponse.ProtoReflect.Descriptor instead.
func (*GetRoutineResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{29}
}

func (x *GetRoutineResponse) GetRoutine() *RoutineDescriptor {
        if x != nil {
                return x.Routine
        }
        return nil
}

type UpsertRoutineRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Routine       *RoutineDescriptor     `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *UpsertRoutineRequest) Reset() {
        *x = UpsertRoutineRequest{}
        mi := &file_emulator_proto_msgTypes[30]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *UpsertRoutineRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*UpsertRoutineRequest) ProtoMessage() {}

func (x *UpsertRoutineRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[30]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use UpsertRoutineRequest.ProtoReflect.Descriptor instead.
func (*UpsertRoutineRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{30}
}

func (x *UpsertRoutineRequest) GetRoutine() *RoutineDescriptor {
        if x != nil {
                return x.Routine
        }
        return nil
}

type UpsertRoutineResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *UpsertRoutineResponse) Reset() {
        *x = UpsertRoutineResponse{}
        mi := &file_emulator_proto_msgTypes[31]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *UpsertRoutineResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*UpsertRoutineResponse) ProtoMessage() {}

func (x *UpsertRoutineResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[31]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use UpsertRoutineResponse.ProtoReflect.Descriptor instead.
func (*UpsertRoutineResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{31}
}

type DeleteRoutineRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Routine       *RoutineRef            `protobuf:"bytes,1,opt,name=routine,proto3" json:"routine,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DeleteRoutineRequest) Reset() {
        *x = DeleteRoutineRequest{}
        mi := &file_emulator_proto_msgTypes[32]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DeleteRoutineRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DeleteRoutineRequest) ProtoMessage() {}

func (x *DeleteRoutineRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[32]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DeleteRoutineRequest.ProtoReflect.Descriptor instead.
func (*DeleteRoutineRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{32}
}

func (x *DeleteRoutineRequest) GetRoutine() *RoutineRef {
        if x != nil {
                return x.Routine
        }
        return nil
}

type DeleteRoutineResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DeleteRoutineResponse) Reset() {
        *x = DeleteRoutineResponse{}
        mi := &file_emulator_proto_msgTypes[33]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DeleteRoutineResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DeleteRoutineResponse) ProtoMessage() {}

func (x *DeleteRoutineResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[33]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DeleteRoutineResponse.ProtoReflect.Descriptor instead.
func (*DeleteRoutineResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{33}
}

type RowAccessPolicy struct {
        state              protoimpl.MessageState `protogen:"open.v1"`
        Table              *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        PolicyId           string                 `protobuf:"bytes,2,opt,name=policy_id,json=policyId,proto3" json:"policy_id,omitempty"`
        FilterPredicate    string                 `protobuf:"bytes,3,opt,name=filter_predicate,json=filterPredicate,proto3" json:"filter_predicate,omitempty"`
        Grantees           []string               `protobuf:"bytes,4,rep,name=grantees,proto3" json:"grantees,omitempty"`
        CreationTimeMs     int64                  `protobuf:"varint,5,opt,name=creation_time_ms,json=creationTimeMs,proto3" json:"creation_time_ms,omitempty"`
        LastModifiedTimeMs int64                  `protobuf:"varint,6,opt,name=last_modified_time_ms,json=lastModifiedTimeMs,proto3" json:"last_modified_time_ms,omitempty"`
        unknownFields      protoimpl.UnknownFields
        sizeCache          protoimpl.SizeCache
}

func (x *RowAccessPolicy) Reset() {
        *x = RowAccessPolicy{}
        mi := &file_emulator_proto_msgTypes[34]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *RowAccessPolicy) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*RowAccessPolicy) ProtoMessage() {}

func (x *RowAccessPolicy) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[34]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use RowAccessPolicy.ProtoReflect.Descriptor instead.
func (*RowAccessPolicy) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{34}
}

func (x *RowAccessPolicy) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

func (x *RowAccessPolicy) GetPolicyId() string {
        if x != nil {
                return x.PolicyId
        }
        return ""
}

func (x *RowAccessPolicy) GetFilterPredicate() string {
        if x != nil {
                return x.FilterPredicate
        }
        return ""
}

func (x *RowAccessPolicy) GetGrantees() []string {
        if x != nil {
                return x.Grantees
        }
        return nil
}

func (x *RowAccessPolicy) GetCreationTimeMs() int64 {
        if x != nil {
                return x.CreationTimeMs
        }
        return 0
}

func (x *RowAccessPolicy) GetLastModifiedTimeMs() int64 {
        if x != nil {
                return x.LastModifiedTimeMs
        }
        return 0
}

type UpsertRowAccessPolicyRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Policy        *RowAccessPolicy       `protobuf:"bytes,1,opt,name=policy,proto3" json:"policy,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *UpsertRowAccessPolicyRequest) Reset() {
        *x = UpsertRowAccessPolicyRequest{}
        mi := &file_emulator_proto_msgTypes[35]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *UpsertRowAccessPolicyRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*UpsertRowAccessPolicyRequest) ProtoMessage() {}

func (x *UpsertRowAccessPolicyRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[35]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use UpsertRowAccessPolicyRequest.ProtoReflect.Descriptor instead.
func (*UpsertRowAccessPolicyRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{35}
}

func (x *UpsertRowAccessPolicyRequest) GetPolicy() *RowAccessPolicy {
        if x != nil {
                return x.Policy
        }
        return nil
}

type UpsertRowAccessPolicyResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Policy        *RowAccessPolicy       `protobuf:"bytes,1,opt,name=policy,proto3" json:"policy,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *UpsertRowAccessPolicyResponse) Reset() {
        *x = UpsertRowAccessPolicyResponse{}
        mi := &file_emulator_proto_msgTypes[36]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *UpsertRowAccessPolicyResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*UpsertRowAccessPolicyResponse) ProtoMessage() {}

func (x *UpsertRowAccessPolicyResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[36]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use UpsertRowAccessPolicyResponse.ProtoReflect.Descriptor instead.
func (*UpsertRowAccessPolicyResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{36}
}

func (x *UpsertRowAccessPolicyResponse) GetPolicy() *RowAccessPolicy {
        if x != nil {
                return x.Policy
        }
        return nil
}

type DeleteRowAccessPolicyRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        PolicyId      string                 `protobuf:"bytes,2,opt,name=policy_id,json=policyId,proto3" json:"policy_id,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DeleteRowAccessPolicyRequest) Reset() {
        *x = DeleteRowAccessPolicyRequest{}
        mi := &file_emulator_proto_msgTypes[37]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DeleteRowAccessPolicyRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DeleteRowAccessPolicyRequest) ProtoMessage() {}

func (x *DeleteRowAccessPolicyRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[37]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DeleteRowAccessPolicyRequest.ProtoReflect.Descriptor instead.
func (*DeleteRowAccessPolicyRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{37}
}

func (x *DeleteRowAccessPolicyRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

func (x *DeleteRowAccessPolicyRequest) GetPolicyId() string {
        if x != nil {
                return x.PolicyId
        }
        return ""
}

type DeleteRowAccessPolicyResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *DeleteRowAccessPolicyResponse) Reset() {
        *x = DeleteRowAccessPolicyResponse{}
        mi := &file_emulator_proto_msgTypes[38]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DeleteRowAccessPolicyResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DeleteRowAccessPolicyResponse) ProtoMessage() {}

func (x *DeleteRowAccessPolicyResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[38]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DeleteRowAccessPolicyResponse.ProtoReflect.Descriptor instead.
func (*DeleteRowAccessPolicyResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{38}
}

type ListRowAccessPoliciesRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListRowAccessPoliciesRequest) Reset() {
        *x = ListRowAccessPoliciesRequest{}
        mi := &file_emulator_proto_msgTypes[39]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListRowAccessPoliciesRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListRowAccessPoliciesRequest) ProtoMessage() {}

func (x *ListRowAccessPoliciesRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[39]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListRowAccessPoliciesRequest.ProtoReflect.Descriptor instead.
func (*ListRowAccessPoliciesRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{39}
}

func (x *ListRowAccessPoliciesRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

type ListRowAccessPoliciesResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Policies      []*RowAccessPolicy     `protobuf:"bytes,1,rep,name=policies,proto3" json:"policies,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ListRowAccessPoliciesResponse) Reset() {
        *x = ListRowAccessPoliciesResponse{}
        mi := &file_emulator_proto_msgTypes[40]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListRowAccessPoliciesResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListRowAccessPoliciesResponse) ProtoMessage() {}

func (x *ListRowAccessPoliciesResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[40]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListRowAccessPoliciesResponse.ProtoReflect.Descriptor instead.
func (*ListRowAccessPoliciesResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{40}
}

func (x *ListRowAccessPoliciesResponse) GetPolicies() []*RowAccessPolicy {
        if x != nil {
                return x.Policies
        }
        return nil
}

type ColumnGovernance struct {
        state      protoimpl.MessageState `protogen:"open.v1"`
        ColumnName string                 `protobuf:"bytes,1,opt,name=column_name,json=columnName,proto3" json:"column_name,omitempty"`
        PolicyTags []string               `protobuf:"bytes,2,rep,name=policy_tags,json=policyTags,proto3" json:"policy_tags,omitempty"`
        // NULLIFY | SHA256 | DEFAULT_VALUE | DENIED | NONE
        MaskKind         string   `protobuf:"bytes,3,opt,name=mask_kind,json=maskKind,proto3" json:"mask_kind,omitempty"`
        MaskGrantees     []string `protobuf:"bytes,4,rep,name=mask_grantees,json=maskGrantees,proto3" json:"mask_grantees,omitempty"`
        DefaultMaskValue string   `protobuf:"bytes,5,opt,name=default_mask_value,json=defaultMaskValue,proto3" json:"default_mask_value,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *ColumnGovernance) Reset() {
        *x = ColumnGovernance{}
        mi := &file_emulator_proto_msgTypes[41]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ColumnGovernance) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ColumnGovernance) ProtoMessage() {}

func (x *ColumnGovernance) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[41]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ColumnGovernance.ProtoReflect.Descriptor instead.
func (*ColumnGovernance) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{41}
}

func (x *ColumnGovernance) GetColumnName() string {
        if x != nil {
                return x.ColumnName
        }
        return ""
}

func (x *ColumnGovernance) GetPolicyTags() []string {
        if x != nil {
                return x.PolicyTags
        }
        return nil
}

func (x *ColumnGovernance) GetMaskKind() string {
        if x != nil {
                return x.MaskKind
        }
        return ""
}

func (x *ColumnGovernance) GetMaskGrantees() []string {
        if x != nil {
                return x.MaskGrantees
        }
        return nil
}

func (x *ColumnGovernance) GetDefaultMaskValue() string {
        if x != nil {
                return x.DefaultMaskValue
        }
        return ""
}

type SetColumnGovernanceRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Table         *TableRef              `protobuf:"bytes,1,opt,name=table,proto3" json:"table,omitempty"`
        Column        *ColumnGovernance      `protobuf:"bytes,2,opt,name=column,proto3" json:"column,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *SetColumnGovernanceRequest) Reset() {
        *x = SetColumnGovernanceRequest{}
        mi := &file_emulator_proto_msgTypes[42]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SetColumnGovernanceRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SetColumnGovernanceRequest) ProtoMessage() {}

func (x *SetColumnGovernanceRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[42]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SetColumnGovernanceRequest.ProtoReflect.Descriptor instead.
func (*SetColumnGovernanceRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{42}
}

func (x *SetColumnGovernanceRequest) GetTable() *TableRef {
        if x != nil {
                return x.Table
        }
        return nil
}

func (x *SetColumnGovernanceRequest) GetColumn() *ColumnGovernance {
        if x != nil {
                return x.Column
        }
        return nil
}

type SetColumnGovernanceResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *SetColumnGovernanceResponse) Reset() {
        *x = SetColumnGovernanceResponse{}
        mi := &file_emulator_proto_msgTypes[43]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SetColumnGovernanceResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SetColumnGovernanceResponse) ProtoMessage() {}

func (x *SetColumnGovernanceResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[43]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SetColumnGovernanceResponse.ProtoReflect.Descriptor instead.
func (*SetColumnGovernanceResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{43}
}

type ListRowsResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        Rows  []*DataRow             `protobuf:"bytes,1,rep,name=rows,proto3" json:"rows,omitempty"`
        // Total number of rows in the table at the moment the request was
        // served; the gateway surfaces this as `totalRows` in the REST
        // response.
        TotalRows int64 `protobuf:"varint,2,opt,name=total_rows,json=totalRows,proto3" json:"total_rows,omitempty"`
        // One past the index of the last row returned. The gateway uses
        // this to emit a `pageToken` (when `next_start_index < total_rows`)
        // or omit it (when the page reached the end of the table).
        NextStartIndex int64 `protobuf:"varint,3,opt,name=next_start_index,json=nextStartIndex,proto3" json:"next_start_index,omitempty"`
        unknownFields  protoimpl.UnknownFields
        sizeCache      protoimpl.SizeCache
}

func (x *ListRowsResponse) Reset() {
        *x = ListRowsResponse{}
        mi := &file_emulator_proto_msgTypes[44]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ListRowsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ListRowsResponse) ProtoMessage() {}

func (x *ListRowsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[44]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ListRowsResponse.ProtoReflect.Descriptor instead.
func (*ListRowsResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{44}
}

func (x *ListRowsResponse) GetRows() []*DataRow {
        if x != nil {
                return x.Rows
        }
        return nil
}

func (x *ListRowsResponse) GetTotalRows() int64 {
        if x != nil {
                return x.TotalRows
        }
        return 0
}

func (x *ListRowsResponse) GetNextStartIndex() int64 {
        if x != nil {
                return x.NextStartIndex
        }
        return 0
}

type QueryRequest struct {
        state     protoimpl.MessageState `protogen:"open.v1"`
        ProjectId string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        // Default dataset for unqualified table references.
        DefaultDatasetId string `protobuf:"bytes,2,opt,name=default_dataset_id,json=defaultDatasetId,proto3" json:"default_dataset_id,omitempty"`
        Sql              string `protobuf:"bytes,3,opt,name=sql,proto3" json:"sql,omitempty"`
        // Optional: query parameters keyed by name. Positional parameters use
        // an empty key (BigQuery's @0, @1, ... convention is unrolled here).
        Parameters   map[string]*QueryParameter `protobuf:"bytes,4,rep,name=parameters,proto3" json:"parameters,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
        UseLegacySql bool                       `protobuf:"varint,5,opt,name=use_legacy_sql,json=useLegacySql,proto3" json:"use_legacy_sql,omitempty"`
        // Synthetic caller email from the gateway auth middleware. Empty
        // defaults to emulator@bigquery.local at the engine boundary.
        PrincipalEmail string `protobuf:"bytes,6,opt,name=principal_email,json=principalEmail,proto3" json:"principal_email,omitempty"`
        unknownFields  protoimpl.UnknownFields
        sizeCache      protoimpl.SizeCache
}

func (x *QueryRequest) Reset() {
        *x = QueryRequest{}
        mi := &file_emulator_proto_msgTypes[45]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *QueryRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*QueryRequest) ProtoMessage() {}

func (x *QueryRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[45]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use QueryRequest.ProtoReflect.Descriptor instead.
func (*QueryRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{45}
}

func (x *QueryRequest) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *QueryRequest) GetDefaultDatasetId() string {
        if x != nil {
                return x.DefaultDatasetId
        }
        return ""
}

func (x *QueryRequest) GetSql() string {
        if x != nil {
                return x.Sql
        }
        return ""
}

func (x *QueryRequest) GetParameters() map[string]*QueryParameter {
        if x != nil {
                return x.Parameters
        }
        return nil
}

func (x *QueryRequest) GetUseLegacySql() bool {
        if x != nil {
                return x.UseLegacySql
        }
        return false
}

func (x *QueryRequest) GetPrincipalEmail() string {
        if x != nil {
                return x.PrincipalEmail
        }
        return ""
}

type QueryParameter struct {
        state     protoimpl.MessageState `protogen:"open.v1"`
        TypeKind  string                 `protobuf:"bytes,1,opt,name=type_kind,json=typeKind,proto3" json:"type_kind,omitempty"`    // googlesql TypeKind name (e.g. INT64, STRING).
        ValueJson string                 `protobuf:"bytes,2,opt,name=value_json,json=valueJson,proto3" json:"value_json,omitempty"` // JSON-encoded literal value.
        // Optional STRUCT/ARRAY field-type descriptor (gateway-encoded from
        // REST `parameterType`; empty for scalar parameters).
        TypeJson      string `protobuf:"bytes,3,opt,name=type_json,json=typeJson,proto3" json:"type_json,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *QueryParameter) Reset() {
        *x = QueryParameter{}
        mi := &file_emulator_proto_msgTypes[46]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *QueryParameter) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*QueryParameter) ProtoMessage() {}

func (x *QueryParameter) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[46]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use QueryParameter.ProtoReflect.Descriptor instead.
func (*QueryParameter) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{46}
}

func (x *QueryParameter) GetTypeKind() string {
        if x != nil {
                return x.TypeKind
        }
        return ""
}

func (x *QueryParameter) GetValueJson() string {
        if x != nil {
                return x.ValueJson
        }
        return ""
}

func (x *QueryParameter) GetTypeJson() string {
        if x != nil {
                return x.TypeJson
        }
        return ""
}

type DryRunResponse struct {
        state                   protoimpl.MessageState `protogen:"open.v1"`
        Schema                  *TableSchema           `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
        EstimatedBytesProcessed int64                  `protobuf:"varint,2,opt,name=estimated_bytes_processed,json=estimatedBytesProcessed,proto3" json:"estimated_bytes_processed,omitempty"`
        unknownFields           protoimpl.UnknownFields
        sizeCache               protoimpl.SizeCache
}

func (x *DryRunResponse) Reset() {
        *x = DryRunResponse{}
        mi := &file_emulator_proto_msgTypes[47]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DryRunResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DryRunResponse) ProtoMessage() {}

func (x *DryRunResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[47]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DryRunResponse.ProtoReflect.Descriptor instead.
func (*DryRunResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{47}
}

func (x *DryRunResponse) GetSchema() *TableSchema {
        if x != nil {
                return x.Schema
        }
        return nil
}

func (x *DryRunResponse) GetEstimatedBytesProcessed() int64 {
        if x != nil {
                return x.EstimatedBytesProcessed
        }
        return 0
}

type QueryResultRow struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // The stream carries one of five message kinds:
        //   - `schema` only — emitted as the first message of every
        //     `ExecuteQuery` reply for SELECT-shaped queries.
        //   - `cells` only — emitted once per result row.
        //   - `dml_stats` only — emitted as the final message of an
        //     INSERT / UPDATE / DELETE / MERGE reply, after any optional
        //     THEN-RETURN rows. Carries the per-statement modification
        //     counts the gateway folds into BigQuery's REST `dmlStats`
        //     and `numDmlAffectedRows` fields.
        //   - `statement_type` only — emitted as the trailing message of
        //     every successful reply (SELECT, DML, and DDL alike). The
        //     value is one of the canonical BigQuery REST statement-type
        //     strings (`SELECT`, `INSERT`, `CREATE_TABLE`, ...) the
        //     gateway folds into the
        //     `QueryResponse.statistics.query.statementType` envelope so
        //     callers can tell DDL / metadata / catalog operations apart
        //     from SELECTs and DML at the response layer. Routes that
        //     produce no recognizable BigQuery shape (today: nothing in
        //     the supported surface) leave the field empty so the gateway
        //     omits the envelope entirely.
        //   - `phase_timings` only — emitted after the last data row (or
        //     dml_stats) and before the `statement_type` trailer. Carries
        //     per-phase wall times in microseconds for emulator-internal
        //     performance debugging; the gateway surfaces this on loopback
        //     callers only as `Job.statistics.query.emulatorPhases`.
        //   - `emulator_route` only — emitted alongside `statement_type`
        //     as the trailing pair of every successful reply. The value
        //     is the canonical lowercase-snake spelling of the
        //     `Disposition` the coordinator's `RouteClassifier` chose
        //     (`duckdb_native`, `duckdb_rewrite`, `duckdb_udf`,
        //     `semantic_executor`, `control_op`, `local_stub`,
        //     `unsupported`; mirrors `backend/engine/disposition.cc`'s
        //     `DispositionToString`). The gateway surfaces this on the
        //     `Job.statistics.query.emulatorRoute` field only to
        //     loopback callers (see
        //     `gateway/middleware/emulator_route.go`) so it stays an
        //     emulator-internal debug signal the conformance harness
        //     reads back to assert per-query routing decisions, never a
        //     stable wire field for BigQuery client libraries.
        Schema        *TableSchema  `protobuf:"bytes,1,opt,name=schema,proto3" json:"schema,omitempty"`
        Cells         []*Cell       `protobuf:"bytes,2,rep,name=cells,proto3" json:"cells,omitempty"`
        DmlStats      *DmlStats     `protobuf:"bytes,3,opt,name=dml_stats,json=dmlStats,proto3" json:"dml_stats,omitempty"`
        StatementType string        `protobuf:"bytes,4,opt,name=statement_type,json=statementType,proto3" json:"statement_type,omitempty"`
        EmulatorRoute string        `protobuf:"bytes,5,opt,name=emulator_route,json=emulatorRoute,proto3" json:"emulator_route,omitempty"`
        PhaseTimings  *PhaseTimings `protobuf:"bytes,6,opt,name=phase_timings,json=phaseTimings,proto3" json:"phase_timings,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *QueryResultRow) Reset() {
        *x = QueryResultRow{}
        mi := &file_emulator_proto_msgTypes[48]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *QueryResultRow) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*QueryResultRow) ProtoMessage() {}

func (x *QueryResultRow) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[48]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use QueryResultRow.ProtoReflect.Descriptor instead.
func (*QueryResultRow) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{48}
}

func (x *QueryResultRow) GetSchema() *TableSchema {
        if x != nil {
                return x.Schema
        }
        return nil
}

func (x *QueryResultRow) GetCells() []*Cell {
        if x != nil {
                return x.Cells
        }
        return nil
}

func (x *QueryResultRow) GetDmlStats() *DmlStats {
        if x != nil {
                return x.DmlStats
        }
        return nil
}

func (x *QueryResultRow) GetStatementType() string {
        if x != nil {
                return x.StatementType
        }
        return ""
}

func (x *QueryResultRow) GetEmulatorRoute() string {
        if x != nil {
                return x.EmulatorRoute
        }
        return ""
}

func (x *QueryResultRow) GetPhaseTimings() *PhaseTimings {
        if x != nil {
                return x.PhaseTimings
        }
        return nil
}

type PhaseTiming struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Name          string                 `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        DurationUs    int64                  `protobuf:"varint,2,opt,name=duration_us,json=durationUs,proto3" json:"duration_us,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *PhaseTiming) Reset() {
        *x = PhaseTiming{}
        mi := &file_emulator_proto_msgTypes[49]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *PhaseTiming) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*PhaseTiming) ProtoMessage() {}

func (x *PhaseTiming) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[49]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use PhaseTiming.ProtoReflect.Descriptor instead.
func (*PhaseTiming) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{49}
}

func (x *PhaseTiming) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

func (x *PhaseTiming) GetDurationUs() int64 {
        if x != nil {
                return x.DurationUs
        }
        return 0
}

type PhaseTimings struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Phases        []*PhaseTiming         `protobuf:"bytes,1,rep,name=phases,proto3" json:"phases,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *PhaseTimings) Reset() {
        *x = PhaseTimings{}
        mi := &file_emulator_proto_msgTypes[50]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *PhaseTimings) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*PhaseTimings) ProtoMessage() {}

func (x *PhaseTimings) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[50]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use PhaseTimings.ProtoReflect.Descriptor instead.
func (*PhaseTimings) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{50}
}

func (x *PhaseTimings) GetPhases() []*PhaseTiming {
        if x != nil {
                return x.Phases
        }
        return nil
}

// DmlStats is the engine's report of how many rows a DML statement
// (INSERT / UPDATE / DELETE / MERGE) modified. Mirrors the BigQuery
// REST `Job.statistics.query.dmlStats` shape (see
// docs/bigquery/docs/reference/rest/v2/DmlStats.md): the counts are
// 64-bit because BigQuery exposes them as decimal strings on the
// wire and the gateway formats them with `strconv.FormatInt` from
// these int64 fields.
type DmlStats struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Number of rows added by INSERT / MERGE-INSERT branches.
        InsertedRowCount int64 `protobuf:"varint,1,opt,name=inserted_row_count,json=insertedRowCount,proto3" json:"inserted_row_count,omitempty"`
        // Number of rows updated by UPDATE / MERGE-UPDATE branches.
        UpdatedRowCount int64 `protobuf:"varint,2,opt,name=updated_row_count,json=updatedRowCount,proto3" json:"updated_row_count,omitempty"`
        // Number of rows removed by DELETE / MERGE-DELETE branches.
        DeletedRowCount int64 `protobuf:"varint,3,opt,name=deleted_row_count,json=deletedRowCount,proto3" json:"deleted_row_count,omitempty"`
        unknownFields   protoimpl.UnknownFields
        sizeCache       protoimpl.SizeCache
}

func (x *DmlStats) Reset() {
        *x = DmlStats{}
        mi := &file_emulator_proto_msgTypes[51]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *DmlStats) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*DmlStats) ProtoMessage() {}

func (x *DmlStats) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[51]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use DmlStats.ProtoReflect.Descriptor instead.
func (*DmlStats) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{51}
}

func (x *DmlStats) GetInsertedRowCount() int64 {
        if x != nil {
                return x.InsertedRowCount
        }
        return 0
}

func (x *DmlStats) GetUpdatedRowCount() int64 {
        if x != nil {
                return x.UpdatedRowCount
        }
        return 0
}

func (x *DmlStats) GetDeletedRowCount() int64 {
        if x != nil {
                return x.DeletedRowCount
        }
        return 0
}

type Cell struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Wire shape: either `string_value`, `null_value`, `array`, or `struct`
        // is set. Mirrors googlesql::Value's flavor of variant.
        //
        // Types that are valid to be assigned to Value:
        //
        //        *Cell_StringValue
        //        *Cell_NullValue
        //        *Cell_Array
        //        *Cell_StructValue
        Value         isCell_Value `protobuf_oneof:"value"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *Cell) Reset() {
        *x = Cell{}
        mi := &file_emulator_proto_msgTypes[52]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *Cell) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*Cell) ProtoMessage() {}

func (x *Cell) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[52]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use Cell.ProtoReflect.Descriptor instead.
func (*Cell) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{52}
}

func (x *Cell) GetValue() isCell_Value {
        if x != nil {
                return x.Value
        }
        return nil
}

func (x *Cell) GetStringValue() string {
        if x != nil {
                if x, ok := x.Value.(*Cell_StringValue); ok {
                        return x.StringValue
                }
        }
        return ""
}

func (x *Cell) GetNullValue() bool {
        if x != nil {
                if x, ok := x.Value.(*Cell_NullValue); ok {
                        return x.NullValue
                }
        }
        return false
}

func (x *Cell) GetArray() *Array {
        if x != nil {
                if x, ok := x.Value.(*Cell_Array); ok {
                        return x.Array
                }
        }
        return nil
}

func (x *Cell) GetStructValue() *Struct {
        if x != nil {
                if x, ok := x.Value.(*Cell_StructValue); ok {
                        return x.StructValue
                }
        }
        return nil
}

type isCell_Value interface {
        isCell_Value()
}

type Cell_StringValue struct {
        StringValue string `protobuf:"bytes,1,opt,name=string_value,json=stringValue,proto3,oneof"`
}

type Cell_NullValue struct {
        NullValue bool `protobuf:"varint,2,opt,name=null_value,json=nullValue,proto3,oneof"`
}

type Cell_Array struct {
        Array *Array `protobuf:"bytes,3,opt,name=array,proto3,oneof"`
}

type Cell_StructValue struct {
        StructValue *Struct `protobuf:"bytes,4,opt,name=struct_value,json=structValue,proto3,oneof"`
}

func (*Cell_StringValue) isCell_Value() {}

func (*Cell_NullValue) isCell_Value() {}

func (*Cell_Array) isCell_Value() {}

func (*Cell_StructValue) isCell_Value() {}

type Array struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Elements      []*Cell                `protobuf:"bytes,1,rep,name=elements,proto3" json:"elements,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *Array) Reset() {
        *x = Array{}
        mi := &file_emulator_proto_msgTypes[53]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *Array) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*Array) ProtoMessage() {}

func (x *Array) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[53]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use Array.ProtoReflect.Descriptor instead.
func (*Array) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{53}
}

func (x *Array) GetElements() []*Cell {
        if x != nil {
                return x.Elements
        }
        return nil
}

type Struct struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Fields        []*Cell                `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *Struct) Reset() {
        *x = Struct{}
        mi := &file_emulator_proto_msgTypes[54]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *Struct) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*Struct) ProtoMessage() {}

func (x *Struct) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[54]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use Struct.ProtoReflect.Descriptor instead.
func (*Struct) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{54}
}

func (x *Struct) GetFields() []*Cell {
        if x != nil {
                return x.Fields
        }
        return nil
}

type SqlDiagnostic struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Line          int32                  `protobuf:"varint,1,opt,name=line,proto3" json:"line,omitempty"`
        Column        int32                  `protobuf:"varint,2,opt,name=column,proto3" json:"column,omitempty"`
        Message       string                 `protobuf:"bytes,3,opt,name=message,proto3" json:"message,omitempty"`
        Severity      string                 `protobuf:"bytes,4,opt,name=severity,proto3" json:"severity,omitempty"`
        EndLine       int32                  `protobuf:"varint,5,opt,name=end_line,json=endLine,proto3" json:"end_line,omitempty"`
        EndColumn     int32                  `protobuf:"varint,6,opt,name=end_column,json=endColumn,proto3" json:"end_column,omitempty"`
        StartByte     int32                  `protobuf:"varint,7,opt,name=start_byte,json=startByte,proto3" json:"start_byte,omitempty"`
        EndByte       int32                  `protobuf:"varint,8,opt,name=end_byte,json=endByte,proto3" json:"end_byte,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *SqlDiagnostic) Reset() {
        *x = SqlDiagnostic{}
        mi := &file_emulator_proto_msgTypes[55]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SqlDiagnostic) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SqlDiagnostic) ProtoMessage() {}

func (x *SqlDiagnostic) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[55]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SqlDiagnostic.ProtoReflect.Descriptor instead.
func (*SqlDiagnostic) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{55}
}

func (x *SqlDiagnostic) GetLine() int32 {
        if x != nil {
                return x.Line
        }
        return 0
}

func (x *SqlDiagnostic) GetColumn() int32 {
        if x != nil {
                return x.Column
        }
        return 0
}

func (x *SqlDiagnostic) GetMessage() string {
        if x != nil {
                return x.Message
        }
        return ""
}

func (x *SqlDiagnostic) GetSeverity() string {
        if x != nil {
                return x.Severity
        }
        return ""
}

func (x *SqlDiagnostic) GetEndLine() int32 {
        if x != nil {
                return x.EndLine
        }
        return 0
}

func (x *SqlDiagnostic) GetEndColumn() int32 {
        if x != nil {
                return x.EndColumn
        }
        return 0
}

func (x *SqlDiagnostic) GetStartByte() int32 {
        if x != nil {
                return x.StartByte
        }
        return 0
}

func (x *SqlDiagnostic) GetEndByte() int32 {
        if x != nil {
                return x.EndByte
        }
        return 0
}

type FormatSqlRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        Sql   string                 `protobuf:"bytes,1,opt,name=sql,proto3" json:"sql,omitempty"`
        // When true, use strict FormatSql (strips comments). Default is lenient.
        Strict            bool  `protobuf:"varint,2,opt,name=strict,proto3" json:"strict,omitempty"`
        LineLengthLimit   int32 `protobuf:"varint,3,opt,name=line_length_limit,json=lineLengthLimit,proto3" json:"line_length_limit,omitempty"`
        IndentationSpaces int32 `protobuf:"varint,4,opt,name=indentation_spaces,json=indentationSpaces,proto3" json:"indentation_spaces,omitempty"`
        unknownFields     protoimpl.UnknownFields
        sizeCache         protoimpl.SizeCache
}

func (x *FormatSqlRequest) Reset() {
        *x = FormatSqlRequest{}
        mi := &file_emulator_proto_msgTypes[56]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FormatSqlRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FormatSqlRequest) ProtoMessage() {}

func (x *FormatSqlRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[56]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FormatSqlRequest.ProtoReflect.Descriptor instead.
func (*FormatSqlRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{56}
}

func (x *FormatSqlRequest) GetSql() string {
        if x != nil {
                return x.Sql
        }
        return ""
}

func (x *FormatSqlRequest) GetStrict() bool {
        if x != nil {
                return x.Strict
        }
        return false
}

func (x *FormatSqlRequest) GetLineLengthLimit() int32 {
        if x != nil {
                return x.LineLengthLimit
        }
        return 0
}

func (x *FormatSqlRequest) GetIndentationSpaces() int32 {
        if x != nil {
                return x.IndentationSpaces
        }
        return 0
}

type FormatSqlResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        FormattedSql  string                 `protobuf:"bytes,1,opt,name=formatted_sql,json=formattedSql,proto3" json:"formatted_sql,omitempty"`
        Diagnostics   []*SqlDiagnostic       `protobuf:"bytes,2,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *FormatSqlResponse) Reset() {
        *x = FormatSqlResponse{}
        mi := &file_emulator_proto_msgTypes[57]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FormatSqlResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FormatSqlResponse) ProtoMessage() {}

func (x *FormatSqlResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[57]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FormatSqlResponse.ProtoReflect.Descriptor instead.
func (*FormatSqlResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{57}
}

func (x *FormatSqlResponse) GetFormattedSql() string {
        if x != nil {
                return x.FormattedSql
        }
        return ""
}

func (x *FormatSqlResponse) GetDiagnostics() []*SqlDiagnostic {
        if x != nil {
                return x.Diagnostics
        }
        return nil
}

type ParseSqlRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Sql           string                 `protobuf:"bytes,1,opt,name=sql,proto3" json:"sql,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ParseSqlRequest) Reset() {
        *x = ParseSqlRequest{}
        mi := &file_emulator_proto_msgTypes[58]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ParseSqlRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ParseSqlRequest) ProtoMessage() {}

func (x *ParseSqlRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[58]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ParseSqlRequest.ProtoReflect.Descriptor instead.
func (*ParseSqlRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{58}
}

func (x *ParseSqlRequest) GetSql() string {
        if x != nil {
                return x.Sql
        }
        return ""
}

type ParseSqlResponse struct {
        state          protoimpl.MessageState `protogen:"open.v1"`
        Diagnostics    []*SqlDiagnostic       `protobuf:"bytes,1,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
        StatementKinds []string               `protobuf:"bytes,2,rep,name=statement_kinds,json=statementKinds,proto3" json:"statement_kinds,omitempty"`
        unknownFields  protoimpl.UnknownFields
        sizeCache      protoimpl.SizeCache
}

func (x *ParseSqlResponse) Reset() {
        *x = ParseSqlResponse{}
        mi := &file_emulator_proto_msgTypes[59]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ParseSqlResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ParseSqlResponse) ProtoMessage() {}

func (x *ParseSqlResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[59]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ParseSqlResponse.ProtoReflect.Descriptor instead.
func (*ParseSqlResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{59}
}

func (x *ParseSqlResponse) GetDiagnostics() []*SqlDiagnostic {
        if x != nil {
                return x.Diagnostics
        }
        return nil
}

func (x *ParseSqlResponse) GetStatementKinds() []string {
        if x != nil {
                return x.StatementKinds
        }
        return nil
}

type TokenizeSqlRequest struct {
        state           protoimpl.MessageState `protogen:"open.v1"`
        Sql             string                 `protobuf:"bytes,1,opt,name=sql,proto3" json:"sql,omitempty"`
        IncludeComments bool                   `protobuf:"varint,2,opt,name=include_comments,json=includeComments,proto3" json:"include_comments,omitempty"`
        unknownFields   protoimpl.UnknownFields
        sizeCache       protoimpl.SizeCache
}

func (x *TokenizeSqlRequest) Reset() {
        *x = TokenizeSqlRequest{}
        mi := &file_emulator_proto_msgTypes[60]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *TokenizeSqlRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*TokenizeSqlRequest) ProtoMessage() {}

func (x *TokenizeSqlRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[60]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use TokenizeSqlRequest.ProtoReflect.Descriptor instead.
func (*TokenizeSqlRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{60}
}

func (x *TokenizeSqlRequest) GetSql() string {
        if x != nil {
                return x.Sql
        }
        return ""
}

func (x *TokenizeSqlRequest) GetIncludeComments() bool {
        if x != nil {
                return x.IncludeComments
        }
        return false
}

type SqlToken struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Kind          string                 `protobuf:"bytes,1,opt,name=kind,proto3" json:"kind,omitempty"`
        Image         string                 `protobuf:"bytes,2,opt,name=image,proto3" json:"image,omitempty"`
        StartByte     int32                  `protobuf:"varint,3,opt,name=start_byte,json=startByte,proto3" json:"start_byte,omitempty"`
        EndByte       int32                  `protobuf:"varint,4,opt,name=end_byte,json=endByte,proto3" json:"end_byte,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *SqlToken) Reset() {
        *x = SqlToken{}
        mi := &file_emulator_proto_msgTypes[61]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SqlToken) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SqlToken) ProtoMessage() {}

func (x *SqlToken) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[61]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SqlToken.ProtoReflect.Descriptor instead.
func (*SqlToken) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{61}
}

func (x *SqlToken) GetKind() string {
        if x != nil {
                return x.Kind
        }
        return ""
}

func (x *SqlToken) GetImage() string {
        if x != nil {
                return x.Image
        }
        return ""
}

func (x *SqlToken) GetStartByte() int32 {
        if x != nil {
                return x.StartByte
        }
        return 0
}

func (x *SqlToken) GetEndByte() int32 {
        if x != nil {
                return x.EndByte
        }
        return 0
}

type TokenizeSqlResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Tokens        []*SqlToken            `protobuf:"bytes,1,rep,name=tokens,proto3" json:"tokens,omitempty"`
        Diagnostics   []*SqlDiagnostic       `protobuf:"bytes,2,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *TokenizeSqlResponse) Reset() {
        *x = TokenizeSqlResponse{}
        mi := &file_emulator_proto_msgTypes[62]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *TokenizeSqlResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*TokenizeSqlResponse) ProtoMessage() {}

func (x *TokenizeSqlResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[62]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use TokenizeSqlResponse.ProtoReflect.Descriptor instead.
func (*TokenizeSqlResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{62}
}

func (x *TokenizeSqlResponse) GetTokens() []*SqlToken {
        if x != nil {
                return x.Tokens
        }
        return nil
}

func (x *TokenizeSqlResponse) GetDiagnostics() []*SqlDiagnostic {
        if x != nil {
                return x.Diagnostics
        }
        return nil
}

type CompleteSqlRequest struct {
        state            protoimpl.MessageState `protogen:"open.v1"`
        ProjectId        string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        DefaultDatasetId string                 `protobuf:"bytes,2,opt,name=default_dataset_id,json=defaultDatasetId,proto3" json:"default_dataset_id,omitempty"`
        Sql              string                 `protobuf:"bytes,3,opt,name=sql,proto3" json:"sql,omitempty"`
        CursorByteOffset int32                  `protobuf:"varint,4,opt,name=cursor_byte_offset,json=cursorByteOffset,proto3" json:"cursor_byte_offset,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *CompleteSqlRequest) Reset() {
        *x = CompleteSqlRequest{}
        mi := &file_emulator_proto_msgTypes[63]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *CompleteSqlRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*CompleteSqlRequest) ProtoMessage() {}

func (x *CompleteSqlRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[63]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use CompleteSqlRequest.ProtoReflect.Descriptor instead.
func (*CompleteSqlRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{63}
}

func (x *CompleteSqlRequest) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *CompleteSqlRequest) GetDefaultDatasetId() string {
        if x != nil {
                return x.DefaultDatasetId
        }
        return ""
}

func (x *CompleteSqlRequest) GetSql() string {
        if x != nil {
                return x.Sql
        }
        return ""
}

func (x *CompleteSqlRequest) GetCursorByteOffset() int32 {
        if x != nil {
                return x.CursorByteOffset
        }
        return 0
}

type SqlCompletionCandidate struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Label         string                 `protobuf:"bytes,1,opt,name=label,proto3" json:"label,omitempty"`
        Kind          string                 `protobuf:"bytes,2,opt,name=kind,proto3" json:"kind,omitempty"`
        InsertText    string                 `protobuf:"bytes,3,opt,name=insert_text,json=insertText,proto3" json:"insert_text,omitempty"`
        Detail        string                 `protobuf:"bytes,4,opt,name=detail,proto3" json:"detail,omitempty"`
        Fqn           string                 `protobuf:"bytes,5,opt,name=fqn,proto3" json:"fqn,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *SqlCompletionCandidate) Reset() {
        *x = SqlCompletionCandidate{}
        mi := &file_emulator_proto_msgTypes[64]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SqlCompletionCandidate) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SqlCompletionCandidate) ProtoMessage() {}

func (x *SqlCompletionCandidate) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[64]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SqlCompletionCandidate.ProtoReflect.Descriptor instead.
func (*SqlCompletionCandidate) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{64}
}

func (x *SqlCompletionCandidate) GetLabel() string {
        if x != nil {
                return x.Label
        }
        return ""
}

func (x *SqlCompletionCandidate) GetKind() string {
        if x != nil {
                return x.Kind
        }
        return ""
}

func (x *SqlCompletionCandidate) GetInsertText() string {
        if x != nil {
                return x.InsertText
        }
        return ""
}

func (x *SqlCompletionCandidate) GetDetail() string {
        if x != nil {
                return x.Detail
        }
        return ""
}

func (x *SqlCompletionCandidate) GetFqn() string {
        if x != nil {
                return x.Fqn
        }
        return ""
}

type CompleteSqlResponse struct {
        state            protoimpl.MessageState    `protogen:"open.v1"`
        Candidates       []*SqlCompletionCandidate `protobuf:"bytes,1,rep,name=candidates,proto3" json:"candidates,omitempty"`
        ReplacementStart int32                     `protobuf:"varint,2,opt,name=replacement_start,json=replacementStart,proto3" json:"replacement_start,omitempty"`
        ReplacementEnd   int32                     `protobuf:"varint,3,opt,name=replacement_end,json=replacementEnd,proto3" json:"replacement_end,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *CompleteSqlResponse) Reset() {
        *x = CompleteSqlResponse{}
        mi := &file_emulator_proto_msgTypes[65]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *CompleteSqlResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*CompleteSqlResponse) ProtoMessage() {}

func (x *CompleteSqlResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[65]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use CompleteSqlResponse.ProtoReflect.Descriptor instead.
func (*CompleteSqlResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{65}
}

func (x *CompleteSqlResponse) GetCandidates() []*SqlCompletionCandidate {
        if x != nil {
                return x.Candidates
        }
        return nil
}

func (x *CompleteSqlResponse) GetReplacementStart() int32 {
        if x != nil {
                return x.ReplacementStart
        }
        return 0
}

func (x *CompleteSqlResponse) GetReplacementEnd() int32 {
        if x != nil {
                return x.ReplacementEnd
        }
        return 0
}

type AnalyzeSqlRequest struct {
        state            protoimpl.MessageState `protogen:"open.v1"`
        ProjectId        string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        DefaultDatasetId string                 `protobuf:"bytes,2,opt,name=default_dataset_id,json=defaultDatasetId,proto3" json:"default_dataset_id,omitempty"`
        Sql              string                 `protobuf:"bytes,3,opt,name=sql,proto3" json:"sql,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *AnalyzeSqlRequest) Reset() {
        *x = AnalyzeSqlRequest{}
        mi := &file_emulator_proto_msgTypes[66]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *AnalyzeSqlRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*AnalyzeSqlRequest) ProtoMessage() {}

func (x *AnalyzeSqlRequest) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[66]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use AnalyzeSqlRequest.ProtoReflect.Descriptor instead.
func (*AnalyzeSqlRequest) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{66}
}

func (x *AnalyzeSqlRequest) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *AnalyzeSqlRequest) GetDefaultDatasetId() string {
        if x != nil {
                return x.DefaultDatasetId
        }
        return ""
}

func (x *AnalyzeSqlRequest) GetSql() string {
        if x != nil {
                return x.Sql
        }
        return ""
}

type ReferencedTable struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        ProjectId     string                 `protobuf:"bytes,1,opt,name=project_id,json=projectId,proto3" json:"project_id,omitempty"`
        DatasetId     string                 `protobuf:"bytes,2,opt,name=dataset_id,json=datasetId,proto3" json:"dataset_id,omitempty"`
        TableId       string                 `protobuf:"bytes,3,opt,name=table_id,json=tableId,proto3" json:"table_id,omitempty"`
        Alias         string                 `protobuf:"bytes,4,opt,name=alias,proto3" json:"alias,omitempty"`
        Kind          string                 `protobuf:"bytes,5,opt,name=kind,proto3" json:"kind,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ReferencedTable) Reset() {
        *x = ReferencedTable{}
        mi := &file_emulator_proto_msgTypes[67]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ReferencedTable) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ReferencedTable) ProtoMessage() {}

func (x *ReferencedTable) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[67]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ReferencedTable.ProtoReflect.Descriptor instead.
func (*ReferencedTable) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{67}
}

func (x *ReferencedTable) GetProjectId() string {
        if x != nil {
                return x.ProjectId
        }
        return ""
}

func (x *ReferencedTable) GetDatasetId() string {
        if x != nil {
                return x.DatasetId
        }
        return ""
}

func (x *ReferencedTable) GetTableId() string {
        if x != nil {
                return x.TableId
        }
        return ""
}

func (x *ReferencedTable) GetAlias() string {
        if x != nil {
                return x.Alias
        }
        return ""
}

func (x *ReferencedTable) GetKind() string {
        if x != nil {
                return x.Kind
        }
        return ""
}

type AnalyzeSqlResponse struct {
        state            protoimpl.MessageState `protogen:"open.v1"`
        ReferencedTables []*ReferencedTable     `protobuf:"bytes,1,rep,name=referenced_tables,json=referencedTables,proto3" json:"referenced_tables,omitempty"`
        StatementKinds   []string               `protobuf:"bytes,2,rep,name=statement_kinds,json=statementKinds,proto3" json:"statement_kinds,omitempty"`
        Diagnostics      []*SqlDiagnostic       `protobuf:"bytes,3,rep,name=diagnostics,proto3" json:"diagnostics,omitempty"`
        unknownFields    protoimpl.UnknownFields
        sizeCache        protoimpl.SizeCache
}

func (x *AnalyzeSqlResponse) Reset() {
        *x = AnalyzeSqlResponse{}
        mi := &file_emulator_proto_msgTypes[68]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *AnalyzeSqlResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*AnalyzeSqlResponse) ProtoMessage() {}

func (x *AnalyzeSqlResponse) ProtoReflect() protoreflect.Message {
        mi := &file_emulator_proto_msgTypes[68]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use AnalyzeSqlResponse.ProtoReflect.Descriptor instead.
func (*AnalyzeSqlResponse) Descriptor() ([]byte, []int) {
        return file_emulator_proto_rawDescGZIP(), []int{68}
}

func (x *AnalyzeSqlResponse) GetReferencedTables() []*ReferencedTable {
        if x != nil {
                return x.ReferencedTables
        }
        return nil
}

func (x *AnalyzeSqlResponse) GetStatementKinds() []string {
        if x != nil {
                return x.StatementKinds
        }
        return nil
}

func (x *AnalyzeSqlResponse) GetDiagnostics() []*SqlDiagnostic {
        if x != nil {
                return x.Diagnostics
        }
        return nil
}

var File_emulator_proto protoreflect.FileDescriptor

const file_emulator_proto_rawDesc = "" +
        "\n" +
        "\x0eemulator.proto\x12\x14bigquery_emulator.v1\"J\n" +
        "\n" +
        "DatasetRef\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
        "\n" +
        "dataset_id\x18\x02 \x01(\tR\tdatasetId\"\x82\x01\n" +
        "\bTableRef\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
        "\n" +
        "dataset_id\x18\x02 \x01(\tR\tdatasetId\x12\x19\n" +
        "\btable_id\x18\x03 \x01(\tR\atableId\x12\x1d\n" +
        "\n" +
        "table_type\x18\x04 \x01(\tR\ttableType\"\xa6\x01\n" +
        "\vFieldSchema\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\x12\x12\n" +
        "\x04type\x18\x02 \x01(\tR\x04type\x12\x12\n" +
        "\x04mode\x18\x03 \x01(\tR\x04mode\x12 \n" +
        "\vdescription\x18\x04 \x01(\tR\vdescription\x129\n" +
        "\x06fields\x18\x05 \x03(\v2!.bigquery_emulator.v1.FieldSchemaR\x06fields\"H\n" +
        "\vTableSchema\x129\n" +
        "\x06fields\x18\x01 \x03(\v2!.bigquery_emulator.v1.FieldSchemaR\x06fields\"p\n" +
        "\x16RegisterDatasetRequest\x12:\n" +
        "\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\x12\x1a\n" +
        "\blocation\x18\x02 \x01(\tR\blocation\"\x19\n" +
        "\x17RegisterDatasetResponse\"\xa7\x01\n" +
        "\x12DropDatasetRequest\x12:\n" +
        "\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\x12'\n" +
        "\x0fdelete_contents\x18\x02 \x01(\bR\x0edeleteContents\x12,\n" +
        "\x12rest_metadata_json\x18\x03 \x01(\tR\x10restMetadataJson\"\x15\n" +
        "\x13DropDatasetResponse\"T\n" +
        "\x16UndeleteDatasetRequest\x12:\n" +
        "\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\"G\n" +
        "\x17UndeleteDatasetResponse\x12,\n" +
        "\x12rest_metadata_json\x18\x01 \x01(\tR\x10restMetadataJson\"4\n" +
        "\x13ListDatasetsRequest\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\"T\n" +
        "\x14ListDatasetsResponse\x12<\n" +
        "\bdatasets\x18\x01 \x03(\v2 .bigquery_emulator.v1.DatasetRefR\bdatasets\"\x87\x01\n" +
        "\x14RegisterTableRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x129\n" +
        "\x06schema\x18\x02 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\"\x17\n" +
        "\x15RegisterTableResponse\"H\n" +
        "\x10DropTableRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\"\x13\n" +
        "\x11DropTableResponse\"O\n" +
        "\x11ListTablesRequest\x12:\n" +
        "\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\"L\n" +
        "\x12ListTablesResponse\x126\n" +
        "\x06tables\x18\x01 \x03(\v2\x1e.bigquery_emulator.v1.TableRefR\x06tables\"L\n" +
        "\x14DescribeTableRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\"\xbf\x01\n" +
        "\x15DescribeTableResponse\x129\n" +
        "\x06schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12\x1d\n" +
        "\n" +
        "table_type\x18\x02 \x01(\tR\ttableType\x12\x1d\n" +
        "\n" +
        "view_query\x18\x03 \x01(\tR\tviewQuery\x12-\n" +
        "\x13view_use_legacy_sql\x18\x04 \x01(\bR\x10viewUseLegacySql\";\n" +
        "\aDataRow\x120\n" +
        "\x05cells\x18\x01 \x03(\v2\x1a.bigquery_emulator.v1.CellR\x05cells\"|\n" +
        "\x11InsertRowsRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x121\n" +
        "\x04rows\x18\x02 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\"\x14\n" +
        "\x12InsertRowsResponse\"\x89\x01\n" +
        "\x0fListRowsRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12\x1f\n" +
        "\vstart_index\x18\x02 \x01(\x03R\n" +
        "startIndex\x12\x1f\n" +
        "\vmax_results\x18\x03 \x01(\x03R\n" +
        "maxResults\"i\n" +
        "\n" +
        "RoutineRef\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
        "\n" +
        "dataset_id\x18\x02 \x01(\tR\tdatasetId\x12\x1d\n" +
        "\n" +
        "routine_id\x18\x03 \x01(\tR\troutineId\"\xf7\x01\n" +
        "\x11RoutineDescriptor\x12:\n" +
        "\aroutine\x18\x01 \x01(\v2 .bigquery_emulator.v1.RoutineRefR\aroutine\x12!\n" +
        "\froutine_type\x18\x02 \x01(\tR\vroutineType\x12\x1a\n" +
        "\blanguage\x18\x03 \x01(\tR\blanguage\x12'\n" +
        "\x0fdefinition_body\x18\x04 \x01(\tR\x0edefinitionBody\x12\x17\n" +
        "\addl_sql\x18\x05 \x01(\tR\x06ddlSql\x12%\n" +
        "\x0esignature_json\x18\x06 \x01(\tR\rsignatureJson\"Q\n" +
        "\x13ListRoutinesRequest\x12:\n" +
        "\adataset\x18\x01 \x01(\v2 .bigquery_emulator.v1.DatasetRefR\adataset\"[\n" +
        "\x14ListRoutinesResponse\x12C\n" +
        "\broutines\x18\x01 \x03(\v2'.bigquery_emulator.v1.RoutineDescriptorR\broutines\"O\n" +
        "\x11GetRoutineRequest\x12:\n" +
        "\aroutine\x18\x01 \x01(\v2 .bigquery_emulator.v1.RoutineRefR\aroutine\"W\n" +
        "\x12GetRoutineResponse\x12A\n" +
        "\aroutine\x18\x01 \x01(\v2'.bigquery_emulator.v1.RoutineDescriptorR\aroutine\"Y\n" +
        "\x14UpsertRoutineRequest\x12A\n" +
        "\aroutine\x18\x01 \x01(\v2'.bigquery_emulator.v1.RoutineDescriptorR\aroutine\"\x17\n" +
        "\x15UpsertRoutineResponse\"R\n" +
        "\x14DeleteRoutineRequest\x12:\n" +
        "\aroutine\x18\x01 \x01(\v2 .bigquery_emulator.v1.RoutineRefR\aroutine\"\x17\n" +
        "\x15DeleteRoutineResponse\"\x88\x02\n" +
        "\x0fRowAccessPolicy\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12\x1b\n" +
        "\tpolicy_id\x18\x02 \x01(\tR\bpolicyId\x12)\n" +
        "\x10filter_predicate\x18\x03 \x01(\tR\x0ffilterPredicate\x12\x1a\n" +
        "\bgrantees\x18\x04 \x03(\tR\bgrantees\x12(\n" +
        "\x10creation_time_ms\x18\x05 \x01(\x03R\x0ecreationTimeMs\x121\n" +
        "\x15last_modified_time_ms\x18\x06 \x01(\x03R\x12lastModifiedTimeMs\"]\n" +
        "\x1cUpsertRowAccessPolicyRequest\x12=\n" +
        "\x06policy\x18\x01 \x01(\v2%.bigquery_emulator.v1.RowAccessPolicyR\x06policy\"^\n" +
        "\x1dUpsertRowAccessPolicyResponse\x12=\n" +
        "\x06policy\x18\x01 \x01(\v2%.bigquery_emulator.v1.RowAccessPolicyR\x06policy\"q\n" +
        "\x1cDeleteRowAccessPolicyRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12\x1b\n" +
        "\tpolicy_id\x18\x02 \x01(\tR\bpolicyId\"\x1f\n" +
        "\x1dDeleteRowAccessPolicyResponse\"T\n" +
        "\x1cListRowAccessPoliciesRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\"b\n" +
        "\x1dListRowAccessPoliciesResponse\x12A\n" +
        "\bpolicies\x18\x01 \x03(\v2%.bigquery_emulator.v1.RowAccessPolicyR\bpolicies\"\xc4\x01\n" +
        "\x10ColumnGovernance\x12\x1f\n" +
        "\vcolumn_name\x18\x01 \x01(\tR\n" +
        "columnName\x12\x1f\n" +
        "\vpolicy_tags\x18\x02 \x03(\tR\n" +
        "policyTags\x12\x1b\n" +
        "\tmask_kind\x18\x03 \x01(\tR\bmaskKind\x12#\n" +
        "\rmask_grantees\x18\x04 \x03(\tR\fmaskGrantees\x12,\n" +
        "\x12default_mask_value\x18\x05 \x01(\tR\x10defaultMaskValue\"\x92\x01\n" +
        "\x1aSetColumnGovernanceRequest\x124\n" +
        "\x05table\x18\x01 \x01(\v2\x1e.bigquery_emulator.v1.TableRefR\x05table\x12>\n" +
        "\x06column\x18\x02 \x01(\v2&.bigquery_emulator.v1.ColumnGovernanceR\x06column\"\x1d\n" +
        "\x1bSetColumnGovernanceResponse\"\x8e\x01\n" +
        "\x10ListRowsResponse\x121\n" +
        "\x04rows\x18\x01 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\x12\x1d\n" +
        "\n" +
        "total_rows\x18\x02 \x01(\x03R\ttotalRows\x12(\n" +
        "\x10next_start_index\x18\x03 \x01(\x03R\x0enextStartIndex\"\xf5\x02\n" +
        "\fQueryRequest\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12,\n" +
        "\x12default_dataset_id\x18\x02 \x01(\tR\x10defaultDatasetId\x12\x10\n" +
        "\x03sql\x18\x03 \x01(\tR\x03sql\x12R\n" +
        "\n" +
        "parameters\x18\x04 \x03(\v22.bigquery_emulator.v1.QueryRequest.ParametersEntryR\n" +
        "parameters\x12$\n" +
        "\x0euse_legacy_sql\x18\x05 \x01(\bR\fuseLegacySql\x12'\n" +
        "\x0fprincipal_email\x18\x06 \x01(\tR\x0eprincipalEmail\x1ac\n" +
        "\x0fParametersEntry\x12\x10\n" +
        "\x03key\x18\x01 \x01(\tR\x03key\x12:\n" +
        "\x05value\x18\x02 \x01(\v2$.bigquery_emulator.v1.QueryParameterR\x05value:\x028\x01\"i\n" +
        "\x0eQueryParameter\x12\x1b\n" +
        "\ttype_kind\x18\x01 \x01(\tR\btypeKind\x12\x1d\n" +
        "\n" +
        "value_json\x18\x02 \x01(\tR\tvalueJson\x12\x1b\n" +
        "\ttype_json\x18\x03 \x01(\tR\btypeJson\"\x87\x01\n" +
        "\x0eDryRunResponse\x129\n" +
        "\x06schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12:\n" +
        "\x19estimated_bytes_processed\x18\x02 \x01(\x03R\x17estimatedBytesProcessed\"\xd1\x02\n" +
        "\x0eQueryResultRow\x129\n" +
        "\x06schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x120\n" +
        "\x05cells\x18\x02 \x03(\v2\x1a.bigquery_emulator.v1.CellR\x05cells\x12;\n" +
        "\tdml_stats\x18\x03 \x01(\v2\x1e.bigquery_emulator.v1.DmlStatsR\bdmlStats\x12%\n" +
        "\x0estatement_type\x18\x04 \x01(\tR\rstatementType\x12%\n" +
        "\x0eemulator_route\x18\x05 \x01(\tR\remulatorRoute\x12G\n" +
        "\rphase_timings\x18\x06 \x01(\v2\".bigquery_emulator.v1.PhaseTimingsR\fphaseTimings\"B\n" +
        "\vPhaseTiming\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n" +
        "\vduration_us\x18\x02 \x01(\x03R\n" +
        "durationUs\"I\n" +
        "\fPhaseTimings\x129\n" +
        "\x06phases\x18\x01 \x03(\v2!.bigquery_emulator.v1.PhaseTimingR\x06phases\"\x90\x01\n" +
        "\bDmlStats\x12,\n" +
        "\x12inserted_row_count\x18\x01 \x01(\x03R\x10insertedRowCount\x12*\n" +
        "\x11updated_row_count\x18\x02 \x01(\x03R\x0fupdatedRowCount\x12*\n" +
        "\x11deleted_row_count\x18\x03 \x01(\x03R\x0fdeletedRowCount\"\xcd\x01\n" +
        "\x04Cell\x12#\n" +
        "\fstring_value\x18\x01 \x01(\tH\x00R\vstringValue\x12\x1f\n" +
        "\n" +
        "null_value\x18\x02 \x01(\bH\x00R\tnullValue\x123\n" +
        "\x05array\x18\x03 \x01(\v2\x1b.bigquery_emulator.v1.ArrayH\x00R\x05array\x12A\n" +
        "\fstruct_value\x18\x04 \x01(\v2\x1c.bigquery_emulator.v1.StructH\x00R\vstructValueB\a\n" +
        "\x05value\"?\n" +
        "\x05Array\x126\n" +
        "\belements\x18\x01 \x03(\v2\x1a.bigquery_emulator.v1.CellR\belements\"<\n" +
        "\x06Struct\x122\n" +
        "\x06fields\x18\x01 \x03(\v2\x1a.bigquery_emulator.v1.CellR\x06fields\"\xe5\x01\n" +
        "\rSqlDiagnostic\x12\x12\n" +
        "\x04line\x18\x01 \x01(\x05R\x04line\x12\x16\n" +
        "\x06column\x18\x02 \x01(\x05R\x06column\x12\x18\n" +
        "\amessage\x18\x03 \x01(\tR\amessage\x12\x1a\n" +
        "\bseverity\x18\x04 \x01(\tR\bseverity\x12\x19\n" +
        "\bend_line\x18\x05 \x01(\x05R\aendLine\x12\x1d\n" +
        "\n" +
        "end_column\x18\x06 \x01(\x05R\tendColumn\x12\x1d\n" +
        "\n" +
        "start_byte\x18\a \x01(\x05R\tstartByte\x12\x19\n" +
        "\bend_byte\x18\b \x01(\x05R\aendByte\"\x97\x01\n" +
        "\x10FormatSqlRequest\x12\x10\n" +
        "\x03sql\x18\x01 \x01(\tR\x03sql\x12\x16\n" +
        "\x06strict\x18\x02 \x01(\bR\x06strict\x12*\n" +
        "\x11line_length_limit\x18\x03 \x01(\x05R\x0flineLengthLimit\x12-\n" +
        "\x12indentation_spaces\x18\x04 \x01(\x05R\x11indentationSpaces\"\x7f\n" +
        "\x11FormatSqlResponse\x12#\n" +
        "\rformatted_sql\x18\x01 \x01(\tR\fformattedSql\x12E\n" +
        "\vdiagnostics\x18\x02 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics\"#\n" +
        "\x0fParseSqlRequest\x12\x10\n" +
        "\x03sql\x18\x01 \x01(\tR\x03sql\"\x82\x01\n" +
        "\x10ParseSqlResponse\x12E\n" +
        "\vdiagnostics\x18\x01 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics\x12'\n" +
        "\x0fstatement_kinds\x18\x02 \x03(\tR\x0estatementKinds\"Q\n" +
        "\x12TokenizeSqlRequest\x12\x10\n" +
        "\x03sql\x18\x01 \x01(\tR\x03sql\x12)\n" +
        "\x10include_comments\x18\x02 \x01(\bR\x0fincludeComments\"n\n" +
        "\bSqlToken\x12\x12\n" +
        "\x04kind\x18\x01 \x01(\tR\x04kind\x12\x14\n" +
        "\x05image\x18\x02 \x01(\tR\x05image\x12\x1d\n" +
        "\n" +
        "start_byte\x18\x03 \x01(\x05R\tstartByte\x12\x19\n" +
        "\bend_byte\x18\x04 \x01(\x05R\aendByte\"\x94\x01\n" +
        "\x13TokenizeSqlResponse\x126\n" +
        "\x06tokens\x18\x01 \x03(\v2\x1e.bigquery_emulator.v1.SqlTokenR\x06tokens\x12E\n" +
        "\vdiagnostics\x18\x02 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics\"\xa1\x01\n" +
        "\x12CompleteSqlRequest\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12,\n" +
        "\x12default_dataset_id\x18\x02 \x01(\tR\x10defaultDatasetId\x12\x10\n" +
        "\x03sql\x18\x03 \x01(\tR\x03sql\x12,\n" +
        "\x12cursor_byte_offset\x18\x04 \x01(\x05R\x10cursorByteOffset\"\x8d\x01\n" +
        "\x16SqlCompletionCandidate\x12\x14\n" +
        "\x05label\x18\x01 \x01(\tR\x05label\x12\x12\n" +
        "\x04kind\x18\x02 \x01(\tR\x04kind\x12\x1f\n" +
        "\vinsert_text\x18\x03 \x01(\tR\n" +
        "insertText\x12\x16\n" +
        "\x06detail\x18\x04 \x01(\tR\x06detail\x12\x10\n" +
        "\x03fqn\x18\x05 \x01(\tR\x03fqn\"\xb9\x01\n" +
        "\x13CompleteSqlResponse\x12L\n" +
        "\n" +
        "candidates\x18\x01 \x03(\v2,.bigquery_emulator.v1.SqlCompletionCandidateR\n" +
        "candidates\x12+\n" +
        "\x11replacement_start\x18\x02 \x01(\x05R\x10replacementStart\x12'\n" +
        "\x0freplacement_end\x18\x03 \x01(\x05R\x0ereplacementEnd\"r\n" +
        "\x11AnalyzeSqlRequest\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12,\n" +
        "\x12default_dataset_id\x18\x02 \x01(\tR\x10defaultDatasetId\x12\x10\n" +
        "\x03sql\x18\x03 \x01(\tR\x03sql\"\x94\x01\n" +
        "\x0fReferencedTable\x12\x1d\n" +
        "\n" +
        "project_id\x18\x01 \x01(\tR\tprojectId\x12\x1d\n" +
        "\n" +
        "dataset_id\x18\x02 \x01(\tR\tdatasetId\x12\x19\n" +
        "\btable_id\x18\x03 \x01(\tR\atableId\x12\x14\n" +
        "\x05alias\x18\x04 \x01(\tR\x05alias\x12\x12\n" +
        "\x04kind\x18\x05 \x01(\tR\x04kind\"\xd8\x01\n" +
        "\x12AnalyzeSqlResponse\x12R\n" +
        "\x11referenced_tables\x18\x01 \x03(\v2%.bigquery_emulator.v1.ReferencedTableR\x10referencedTables\x12'\n" +
        "\x0fstatement_kinds\x18\x02 \x03(\tR\x0estatementKinds\x12E\n" +
        "\vdiagnostics\x18\x03 \x03(\v2#.bigquery_emulator.v1.SqlDiagnosticR\vdiagnostics2\xa4\x0f\n" +
        "\aCatalog\x12n\n" +
        "\x0fRegisterDataset\x12,.bigquery_emulator.v1.RegisterDatasetRequest\x1a-.bigquery_emulator.v1.RegisterDatasetResponse\x12b\n" +
        "\vDropDataset\x12(.bigquery_emulator.v1.DropDatasetRequest\x1a).bigquery_emulator.v1.DropDatasetResponse\x12n\n" +
        "\x0fUndeleteDataset\x12,.bigquery_emulator.v1.UndeleteDatasetRequest\x1a-.bigquery_emulator.v1.UndeleteDatasetResponse\x12e\n" +
        "\fListDatasets\x12).bigquery_emulator.v1.ListDatasetsRequest\x1a*.bigquery_emulator.v1.ListDatasetsResponse\x12h\n" +
        "\rRegisterTable\x12*.bigquery_emulator.v1.RegisterTableRequest\x1a+.bigquery_emulator.v1.RegisterTableResponse\x12\\\n" +
        "\tDropTable\x12&.bigquery_emulator.v1.DropTableRequest\x1a'.bigquery_emulator.v1.DropTableResponse\x12_\n" +
        "\n" +
        "ListTables\x12'.bigquery_emulator.v1.ListTablesRequest\x1a(.bigquery_emulator.v1.ListTablesResponse\x12h\n" +
        "\rDescribeTable\x12*.bigquery_emulator.v1.DescribeTableRequest\x1a+.bigquery_emulator.v1.DescribeTableResponse\x12_\n" +
        "\n" +
        "InsertRows\x12'.bigquery_emulator.v1.InsertRowsRequest\x1a(.bigquery_emulator.v1.InsertRowsResponse\x12Y\n" +
        "\bListRows\x12%.bigquery_emulator.v1.ListRowsRequest\x1a&.bigquery_emulator.v1.ListRowsResponse\x12e\n" +
        "\fListRoutines\x12).bigquery_emulator.v1.ListRoutinesRequest\x1a*.bigquery_emulator.v1.ListRoutinesResponse\x12_\n" +
        "\n" +
        "GetRoutine\x12'.bigquery_emulator.v1.GetRoutineRequest\x1a(.bigquery_emulator.v1.GetRoutineResponse\x12h\n" +
        "\rUpsertRoutine\x12*.bigquery_emulator.v1.UpsertRoutineRequest\x1a+.bigquery_emulator.v1.UpsertRoutineResponse\x12h\n" +
        "\rDeleteRoutine\x12*.bigquery_emulator.v1.DeleteRoutineRequest\x1a+.bigquery_emulator.v1.DeleteRoutineResponse\x12\x80\x01\n" +
        "\x15UpsertRowAccessPolicy\x122.bigquery_emulator.v1.UpsertRowAccessPolicyRequest\x1a3.bigquery_emulator.v1.UpsertRowAccessPolicyResponse\x12\x80\x01\n" +
        "\x15DeleteRowAccessPolicy\x122.bigquery_emulator.v1.DeleteRowAccessPolicyRequest\x1a3.bigquery_emulator.v1.DeleteRowAccessPolicyResponse\x12\x80\x01\n" +
        "\x15ListRowAccessPolicies\x122.bigquery_emulator.v1.ListRowAccessPoliciesRequest\x1a3.bigquery_emulator.v1.ListRowAccessPoliciesResponse\x12z\n" +
        "\x13SetColumnGovernance\x120.bigquery_emulator.v1.SetColumnGovernanceRequest\x1a1.bigquery_emulator.v1.SetColumnGovernanceResponse2\xb7\x01\n" +
        "\x05Query\x12R\n" +
        "\x06DryRun\x12\".bigquery_emulator.v1.QueryRequest\x1a$.bigquery_emulator.v1.DryRunResponse\x12Z\n" +
        "\fExecuteQuery\x12\".bigquery_emulator.v1.QueryRequest\x1a$.bigquery_emulator.v1.QueryResultRow0\x012\xdd\x03\n" +
        "\bSqlTools\x12Y\n" +
        "\x06Format\x12&.bigquery_emulator.v1.FormatSqlRequest\x1a'.bigquery_emulator.v1.FormatSqlResponse\x12V\n" +
        "\x05Parse\x12%.bigquery_emulator.v1.ParseSqlRequest\x1a&.bigquery_emulator.v1.ParseSqlResponse\x12_\n" +
        "\bTokenize\x12(.bigquery_emulator.v1.TokenizeSqlRequest\x1a).bigquery_emulator.v1.TokenizeSqlResponse\x12_\n" +
        "\bComplete\x12(.bigquery_emulator.v1.CompleteSqlRequest\x1a).bigquery_emulator.v1.CompleteSqlResponse\x12\\\n" +
        "\aAnalyze\x12'.bigquery_emulator.v1.AnalyzeSqlRequest\x1a(.bigquery_emulator.v1.AnalyzeSqlResponseBFZAgithub.com/vantaboard/bigquery-emulator/gateway/enginepb;enginepb\xf8\x01\x01b\x06proto3"

var (
        file_emulator_proto_rawDescOnce sync.Once
        file_emulator_proto_rawDescData []byte
)

func file_emulator_proto_rawDescGZIP() []byte {
        file_emulator_proto_rawDescOnce.Do(func() {
                file_emulator_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_emulator_proto_rawDesc), len(file_emulator_proto_rawDesc)))
        })
        return file_emulator_proto_rawDescData
}

var file_emulator_proto_msgTypes = make([]protoimpl.MessageInfo, 70)
var file_emulator_proto_goTypes = []any{
        (*DatasetRef)(nil),                    // 0: bigquery_emulator.v1.DatasetRef
        (*TableRef)(nil),                      // 1: bigquery_emulator.v1.TableRef
        (*FieldSchema)(nil),                   // 2: bigquery_emulator.v1.FieldSchema
        (*TableSchema)(nil),                   // 3: bigquery_emulator.v1.TableSchema
        (*RegisterDatasetRequest)(nil),        // 4: bigquery_emulator.v1.RegisterDatasetRequest
        (*RegisterDatasetResponse)(nil),       // 5: bigquery_emulator.v1.RegisterDatasetResponse
        (*DropDatasetRequest)(nil),            // 6: bigquery_emulator.v1.DropDatasetRequest
        (*DropDatasetResponse)(nil),           // 7: bigquery_emulator.v1.DropDatasetResponse
        (*UndeleteDatasetRequest)(nil),        // 8: bigquery_emulator.v1.UndeleteDatasetRequest
        (*UndeleteDatasetResponse)(nil),       // 9: bigquery_emulator.v1.UndeleteDatasetResponse
        (*ListDatasetsRequest)(nil),           // 10: bigquery_emulator.v1.ListDatasetsRequest
        (*ListDatasetsResponse)(nil),          // 11: bigquery_emulator.v1.ListDatasetsResponse
        (*RegisterTableRequest)(nil),          // 12: bigquery_emulator.v1.RegisterTableRequest
        (*RegisterTableResponse)(nil),         // 13: bigquery_emulator.v1.RegisterTableResponse
        (*DropTableRequest)(nil),              // 14: bigquery_emulator.v1.DropTableRequest
        (*DropTableResponse)(nil),             // 15: bigquery_emulator.v1.DropTableResponse
        (*ListTablesRequest)(nil),             // 16: bigquery_emulator.v1.ListTablesRequest
        (*ListTablesResponse)(nil),            // 17: bigquery_emulator.v1.ListTablesResponse
        (*DescribeTableRequest)(nil),          // 18: bigquery_emulator.v1.DescribeTableRequest
        (*DescribeTableResponse)(nil),         // 19: bigquery_emulator.v1.DescribeTableResponse
        (*DataRow)(nil),                       // 20: bigquery_emulator.v1.DataRow
        (*InsertRowsRequest)(nil),             // 21: bigquery_emulator.v1.InsertRowsRequest
        (*InsertRowsResponse)(nil),            // 22: bigquery_emulator.v1.InsertRowsResponse
        (*ListRowsRequest)(nil),               // 23: bigquery_emulator.v1.ListRowsRequest
        (*RoutineRef)(nil),                    // 24: bigquery_emulator.v1.RoutineRef
        (*RoutineDescriptor)(nil),             // 25: bigquery_emulator.v1.RoutineDescriptor
        (*ListRoutinesRequest)(nil),           // 26: bigquery_emulator.v1.ListRoutinesRequest
        (*ListRoutinesResponse)(nil),          // 27: bigquery_emulator.v1.ListRoutinesResponse
        (*GetRoutineRequest)(nil),             // 28: bigquery_emulator.v1.GetRoutineRequest
        (*GetRoutineResponse)(nil),            // 29: bigquery_emulator.v1.GetRoutineResponse
        (*UpsertRoutineRequest)(nil),          // 30: bigquery_emulator.v1.UpsertRoutineRequest
        (*UpsertRoutineResponse)(nil),         // 31: bigquery_emulator.v1.UpsertRoutineResponse
        (*DeleteRoutineRequest)(nil),          // 32: bigquery_emulator.v1.DeleteRoutineRequest
        (*DeleteRoutineResponse)(nil),         // 33: bigquery_emulator.v1.DeleteRoutineResponse
        (*RowAccessPolicy)(nil),               // 34: bigquery_emulator.v1.RowAccessPolicy
        (*UpsertRowAccessPolicyRequest)(nil),  // 35: bigquery_emulator.v1.UpsertRowAccessPolicyRequest
        (*UpsertRowAccessPolicyResponse)(nil), // 36: bigquery_emulator.v1.UpsertRowAccessPolicyResponse
        (*DeleteRowAccessPolicyRequest)(nil),  // 37: bigquery_emulator.v1.DeleteRowAccessPolicyRequest
        (*DeleteRowAccessPolicyResponse)(nil), // 38: bigquery_emulator.v1.DeleteRowAccessPolicyResponse
        (*ListRowAccessPoliciesRequest)(nil),  // 39: bigquery_emulator.v1.ListRowAccessPoliciesRequest
        (*ListRowAccessPoliciesResponse)(nil), // 40: bigquery_emulator.v1.ListRowAccessPoliciesResponse
        (*ColumnGovernance)(nil),              // 41: bigquery_emulator.v1.ColumnGovernance
        (*SetColumnGovernanceRequest)(nil),    // 42: bigquery_emulator.v1.SetColumnGovernanceRequest
        (*SetColumnGovernanceResponse)(nil),   // 43: bigquery_emulator.v1.SetColumnGovernanceResponse
        (*ListRowsResponse)(nil),              // 44: bigquery_emulator.v1.ListRowsResponse
        (*QueryRequest)(nil),                  // 45: bigquery_emulator.v1.QueryRequest
        (*QueryParameter)(nil),                // 46: bigquery_emulator.v1.QueryParameter
        (*DryRunResponse)(nil),                // 47: bigquery_emulator.v1.DryRunResponse
        (*QueryResultRow)(nil),                // 48: bigquery_emulator.v1.QueryResultRow
        (*PhaseTiming)(nil),                   // 49: bigquery_emulator.v1.PhaseTiming
        (*PhaseTimings)(nil),                  // 50: bigquery_emulator.v1.PhaseTimings
        (*DmlStats)(nil),                      // 51: bigquery_emulator.v1.DmlStats
        (*Cell)(nil),                          // 52: bigquery_emulator.v1.Cell
        (*Array)(nil),                         // 53: bigquery_emulator.v1.Array
        (*Struct)(nil),                        // 54: bigquery_emulator.v1.Struct
        (*SqlDiagnostic)(nil),                 // 55: bigquery_emulator.v1.SqlDiagnostic
        (*FormatSqlRequest)(nil),              // 56: bigquery_emulator.v1.FormatSqlRequest
        (*FormatSqlResponse)(nil),             // 57: bigquery_emulator.v1.FormatSqlResponse
        (*ParseSqlRequest)(nil),               // 58: bigquery_emulator.v1.ParseSqlRequest
        (*ParseSqlResponse)(nil),              // 59: bigquery_emulator.v1.ParseSqlResponse
        (*TokenizeSqlRequest)(nil),            // 60: bigquery_emulator.v1.TokenizeSqlRequest
        (*SqlToken)(nil),                      // 61: bigquery_emulator.v1.SqlToken
        (*TokenizeSqlResponse)(nil),           // 62: bigquery_emulator.v1.TokenizeSqlResponse
        (*CompleteSqlRequest)(nil),            // 63: bigquery_emulator.v1.CompleteSqlRequest
        (*SqlCompletionCandidate)(nil),        // 64: bigquery_emulator.v1.SqlCompletionCandidate
        (*CompleteSqlResponse)(nil),           // 65: bigquery_emulator.v1.CompleteSqlResponse
        (*AnalyzeSqlRequest)(nil),             // 66: bigquery_emulator.v1.AnalyzeSqlRequest
        (*ReferencedTable)(nil),               // 67: bigquery_emulator.v1.ReferencedTable
        (*AnalyzeSqlResponse)(nil),            // 68: bigquery_emulator.v1.AnalyzeSqlResponse
        nil,                                   // 69: bigquery_emulator.v1.QueryRequest.ParametersEntry
}
var file_emulator_proto_depIdxs = []int32{
        2,  // 0: bigquery_emulator.v1.FieldSchema.fields:type_name -> bigquery_emulator.v1.FieldSchema
        2,  // 1: bigquery_emulator.v1.TableSchema.fields:type_name -> bigquery_emulator.v1.FieldSchema
        0,  // 2: bigquery_emulator.v1.RegisterDatasetRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
        0,  // 3: bigquery_emulator.v1.DropDatasetRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
        0,  // 4: bigquery_emulator.v1.UndeleteDatasetRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
        0,  // 5: bigquery_emulator.v1.ListDatasetsResponse.datasets:type_name -> bigquery_emulator.v1.DatasetRef
        1,  // 6: bigquery_emulator.v1.RegisterTableRequest.table:type_name -> bigquery_emulator.v1.TableRef
        3,  // 7: bigquery_emulator.v1.RegisterTableRequest.schema:type_name -> bigquery_emulator.v1.TableSchema
        1,  // 8: bigquery_emulator.v1.DropTableRequest.table:type_name -> bigquery_emulator.v1.TableRef
        0,  // 9: bigquery_emulator.v1.ListTablesRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
        1,  // 10: bigquery_emulator.v1.ListTablesResponse.tables:type_name -> bigquery_emulator.v1.TableRef
        1,  // 11: bigquery_emulator.v1.DescribeTableRequest.table:type_name -> bigquery_emulator.v1.TableRef
        3,  // 12: bigquery_emulator.v1.DescribeTableResponse.schema:type_name -> bigquery_emulator.v1.TableSchema
        52, // 13: bigquery_emulator.v1.DataRow.cells:type_name -> bigquery_emulator.v1.Cell
        1,  // 14: bigquery_emulator.v1.InsertRowsRequest.table:type_name -> bigquery_emulator.v1.TableRef
        20, // 15: bigquery_emulator.v1.InsertRowsRequest.rows:type_name -> bigquery_emulator.v1.DataRow
        1,  // 16: bigquery_emulator.v1.ListRowsRequest.table:type_name -> bigquery_emulator.v1.TableRef
        24, // 17: bigquery_emulator.v1.RoutineDescriptor.routine:type_name -> bigquery_emulator.v1.RoutineRef
        0,  // 18: bigquery_emulator.v1.ListRoutinesRequest.dataset:type_name -> bigquery_emulator.v1.DatasetRef
        25, // 19: bigquery_emulator.v1.ListRoutinesResponse.routines:type_name -> bigquery_emulator.v1.RoutineDescriptor
        24, // 20: bigquery_emulator.v1.GetRoutineRequest.routine:type_name -> bigquery_emulator.v1.RoutineRef
        25, // 21: bigquery_emulator.v1.GetRoutineResponse.routine:type_name -> bigquery_emulator.v1.RoutineDescriptor
        25, // 22: bigquery_emulator.v1.UpsertRoutineRequest.routine:type_name -> bigquery_emulator.v1.RoutineDescriptor
        24, // 23: bigquery_emulator.v1.DeleteRoutineRequest.routine:type_name -> bigquery_emulator.v1.RoutineRef
        1,  // 24: bigquery_emulator.v1.RowAccessPolicy.table:type_name -> bigquery_emulator.v1.TableRef
        34, // 25: bigquery_emulator.v1.UpsertRowAccessPolicyRequest.policy:type_name -> bigquery_emulator.v1.RowAccessPolicy
        34, // 26: bigquery_emulator.v1.UpsertRowAccessPolicyResponse.policy:type_name -> bigquery_emulator.v1.RowAccessPolicy
        1,  // 27: bigquery_emulator.v1.DeleteRowAccessPolicyRequest.table:type_name -> bigquery_emulator.v1.TableRef
        1,  // 28: bigquery_emulator.v1.ListRowAccessPoliciesRequest.table:type_name -> bigquery_emulator.v1.TableRef
        34, // 29: bigquery_emulator.v1.ListRowAccessPoliciesResponse.policies:type_name -> bigquery_emulator.v1.RowAccessPolicy
        1,  // 30: bigquery_emulator.v1.SetColumnGovernanceRequest.table:type_name -> bigquery_emulator.v1.TableRef
        41, // 31: bigquery_emulator.v1.SetColumnGovernanceRequest.column:type_name -> bigquery_emulator.v1.ColumnGovernance
        20, // 32: bigquery_emulator.v1.ListRowsResponse.rows:type_name -> bigquery_emulator.v1.DataRow
        69, // 33: bigquery_emulator.v1.QueryRequest.parameters:type_name -> bigquery_emulator.v1.QueryRequest.ParametersEntry
        3,  // 34: bigquery_emulator.v1.DryRunResponse.schema:type_name -> bigquery_emulator.v1.TableSchema
        3,  // 35: bigquery_emulator.v1.QueryResultRow.schema:type_name -> bigquery_emulator.v1.TableSchema
        52, // 36: bigquery_emulator.v1.QueryResultRow.cells:type_name -> bigquery_emulator.v1.Cell
        51, // 37: bigquery_emulator.v1.QueryResultRow.dml_stats:type_name -> bigquery_emulator.v1.DmlStats
        50, // 38: bigquery_emulator.v1.QueryResultRow.phase_timings:type_name -> bigquery_emulator.v1.PhaseTimings
        49, // 39: bigquery_emulator.v1.PhaseTimings.phases:type_name -> bigquery_emulator.v1.PhaseTiming
        53, // 40: bigquery_emulator.v1.Cell.array:type_name -> bigquery_emulator.v1.Array
        54, // 41: bigquery_emulator.v1.Cell.struct_value:type_name -> bigquery_emulator.v1.Struct
        52, // 42: bigquery_emulator.v1.Array.elements:type_name -> bigquery_emulator.v1.Cell
        52, // 43: bigquery_emulator.v1.Struct.fields:type_name -> bigquery_emulator.v1.Cell
        55, // 44: bigquery_emulator.v1.FormatSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
        55, // 45: bigquery_emulator.v1.ParseSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
        61, // 46: bigquery_emulator.v1.TokenizeSqlResponse.tokens:type_name -> bigquery_emulator.v1.SqlToken
        55, // 47: bigquery_emulator.v1.TokenizeSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
        64, // 48: bigquery_emulator.v1.CompleteSqlResponse.candidates:type_name -> bigquery_emulator.v1.SqlCompletionCandidate
        67, // 49: bigquery_emulator.v1.AnalyzeSqlResponse.referenced_tables:type_name -> bigquery_emulator.v1.ReferencedTable
        55, // 50: bigquery_emulator.v1.AnalyzeSqlResponse.diagnostics:type_name -> bigquery_emulator.v1.SqlDiagnostic
        46, // 51: bigquery_emulator.v1.QueryRequest.ParametersEntry.value:type_name -> bigquery_emulator.v1.QueryParameter
        4,  // 52: bigquery_emulator.v1.Catalog.RegisterDataset:input_type -> bigquery_emulator.v1.RegisterDatasetRequest
        6,  // 53: bigquery_emulator.v1.Catalog.DropDataset:input_type -> bigquery_emulator.v1.DropDatasetRequest
        8,  // 54: bigquery_emulator.v1.Catalog.UndeleteDataset:input_type -> bigquery_emulator.v1.UndeleteDatasetRequest
        10, // 55: bigquery_emulator.v1.Catalog.ListDatasets:input_type -> bigquery_emulator.v1.ListDatasetsRequest
        12, // 56: bigquery_emulator.v1.Catalog.RegisterTable:input_type -> bigquery_emulator.v1.RegisterTableRequest
        14, // 57: bigquery_emulator.v1.Catalog.DropTable:input_type -> bigquery_emulator.v1.DropTableRequest
        16, // 58: bigquery_emulator.v1.Catalog.ListTables:input_type -> bigquery_emulator.v1.ListTablesRequest
        18, // 59: bigquery_emulator.v1.Catalog.DescribeTable:input_type -> bigquery_emulator.v1.DescribeTableRequest
        21, // 60: bigquery_emulator.v1.Catalog.InsertRows:input_type -> bigquery_emulator.v1.InsertRowsRequest
        23, // 61: bigquery_emulator.v1.Catalog.ListRows:input_type -> bigquery_emulator.v1.ListRowsRequest
        26, // 62: bigquery_emulator.v1.Catalog.ListRoutines:input_type -> bigquery_emulator.v1.ListRoutinesRequest
        28, // 63: bigquery_emulator.v1.Catalog.GetRoutine:input_type -> bigquery_emulator.v1.GetRoutineRequest
        30, // 64: bigquery_emulator.v1.Catalog.UpsertRoutine:input_type -> bigquery_emulator.v1.UpsertRoutineRequest
        32, // 65: bigquery_emulator.v1.Catalog.DeleteRoutine:input_type -> bigquery_emulator.v1.DeleteRoutineRequest
        35, // 66: bigquery_emulator.v1.Catalog.UpsertRowAccessPolicy:input_type -> bigquery_emulator.v1.UpsertRowAccessPolicyRequest
        37, // 67: bigquery_emulator.v1.Catalog.DeleteRowAccessPolicy:input_type -> bigquery_emulator.v1.DeleteRowAccessPolicyRequest
        39, // 68: bigquery_emulator.v1.Catalog.ListRowAccessPolicies:input_type -> bigquery_emulator.v1.ListRowAccessPoliciesRequest
        42, // 69: bigquery_emulator.v1.Catalog.SetColumnGovernance:input_type -> bigquery_emulator.v1.SetColumnGovernanceRequest
        45, // 70: bigquery_emulator.v1.Query.DryRun:input_type -> bigquery_emulator.v1.QueryRequest
        45, // 71: bigquery_emulator.v1.Query.ExecuteQuery:input_type -> bigquery_emulator.v1.QueryRequest
        56, // 72: bigquery_emulator.v1.SqlTools.Format:input_type -> bigquery_emulator.v1.FormatSqlRequest
        58, // 73: bigquery_emulator.v1.SqlTools.Parse:input_type -> bigquery_emulator.v1.ParseSqlRequest
        60, // 74: bigquery_emulator.v1.SqlTools.Tokenize:input_type -> bigquery_emulator.v1.TokenizeSqlRequest
        63, // 75: bigquery_emulator.v1.SqlTools.Complete:input_type -> bigquery_emulator.v1.CompleteSqlRequest
        66, // 76: bigquery_emulator.v1.SqlTools.Analyze:input_type -> bigquery_emulator.v1.AnalyzeSqlRequest
        5,  // 77: bigquery_emulator.v1.Catalog.RegisterDataset:output_type -> bigquery_emulator.v1.RegisterDatasetResponse
        7,  // 78: bigquery_emulator.v1.Catalog.DropDataset:output_type -> bigquery_emulator.v1.DropDatasetResponse
        9,  // 79: bigquery_emulator.v1.Catalog.UndeleteDataset:output_type -> bigquery_emulator.v1.UndeleteDatasetResponse
        11, // 80: bigquery_emulator.v1.Catalog.ListDatasets:output_type -> bigquery_emulator.v1.ListDatasetsResponse
        13, // 81: bigquery_emulator.v1.Catalog.RegisterTable:output_type -> bigquery_emulator.v1.RegisterTableResponse
        15, // 82: bigquery_emulator.v1.Catalog.DropTable:output_type -> bigquery_emulator.v1.DropTableResponse
        17, // 83: bigquery_emulator.v1.Catalog.ListTables:output_type -> bigquery_emulator.v1.ListTablesResponse
        19, // 84: bigquery_emulator.v1.Catalog.DescribeTable:output_type -> bigquery_emulator.v1.DescribeTableResponse
        22, // 85: bigquery_emulator.v1.Catalog.InsertRows:output_type -> bigquery_emulator.v1.InsertRowsResponse
        44, // 86: bigquery_emulator.v1.Catalog.ListRows:output_type -> bigquery_emulator.v1.ListRowsResponse
        27, // 87: bigquery_emulator.v1.Catalog.ListRoutines:output_type -> bigquery_emulator.v1.ListRoutinesResponse
        29, // 88: bigquery_emulator.v1.Catalog.GetRoutine:output_type -> bigquery_emulator.v1.GetRoutineResponse
        31, // 89: bigquery_emulator.v1.Catalog.UpsertRoutine:output_type -> bigquery_emulator.v1.UpsertRoutineResponse
        33, // 90: bigquery_emulator.v1.Catalog.DeleteRoutine:output_type -> bigquery_emulator.v1.DeleteRoutineResponse
        36, // 91: bigquery_emulator.v1.Catalog.UpsertRowAccessPolicy:output_type -> bigquery_emulator.v1.UpsertRowAccessPolicyResponse
        38, // 92: bigquery_emulator.v1.Catalog.DeleteRowAccessPolicy:output_type -> bigquery_emulator.v1.DeleteRowAccessPolicyResponse
        40, // 93: bigquery_emulator.v1.Catalog.ListRowAccessPolicies:output_type -> bigquery_emulator.v1.ListRowAccessPoliciesResponse
        43, // 94: bigquery_emulator.v1.Catalog.SetColumnGovernance:output_type -> bigquery_emulator.v1.SetColumnGovernanceResponse
        47, // 95: bigquery_emulator.v1.Query.DryRun:output_type -> bigquery_emulator.v1.DryRunResponse
        48, // 96: bigquery_emulator.v1.Query.ExecuteQuery:output_type -> bigquery_emulator.v1.QueryResultRow
        57, // 97: bigquery_emulator.v1.SqlTools.Format:output_type -> bigquery_emulator.v1.FormatSqlResponse
        59, // 98: bigquery_emulator.v1.SqlTools.Parse:output_type -> bigquery_emulator.v1.ParseSqlResponse
        62, // 99: bigquery_emulator.v1.SqlTools.Tokenize:output_type -> bigquery_emulator.v1.TokenizeSqlResponse
        65, // 100: bigquery_emulator.v1.SqlTools.Complete:output_type -> bigquery_emulator.v1.CompleteSqlResponse
        68, // 101: bigquery_emulator.v1.SqlTools.Analyze:output_type -> bigquery_emulator.v1.AnalyzeSqlResponse
        77, // [77:102] is the sub-list for method output_type
        52, // [52:77] is the sub-list for method input_type
        52, // [52:52] is the sub-list for extension type_name
        52, // [52:52] is the sub-list for extension extendee
        0,  // [0:52] is the sub-list for field type_name
}

func init() { file_emulator_proto_init() }
func file_emulator_proto_init() {
        if File_emulator_proto != nil {
                return
        }
        file_emulator_proto_msgTypes[52].OneofWrappers = []any{
                (*Cell_StringValue)(nil),
                (*Cell_NullValue)(nil),
                (*Cell_Array)(nil),
                (*Cell_StructValue)(nil),
        }
        type x struct{}
        out := protoimpl.TypeBuilder{
                File: protoimpl.DescBuilder{
                        GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
                        RawDescriptor: unsafe.Slice(unsafe.StringData(file_emulator_proto_rawDesc), len(file_emulator_proto_rawDesc)),
                        NumEnums:      0,
                        NumMessages:   70,
                        NumExtensions: 0,
                        NumServices:   3,
                },
                GoTypes:           file_emulator_proto_goTypes,
                DependencyIndexes: file_emulator_proto_depIdxs,
                MessageInfos:      file_emulator_proto_msgTypes,
        }.Build()
        File_emulator_proto = out.File
        file_emulator_proto_goTypes = nil
        file_emulator_proto_depIdxs = nil
}

// emulator.proto is the internal contract between the Go REST gateway and
// the C++ engine. It is intentionally minimal: the gateway owns the
// public-facing BigQuery REST shape, and only forwards the bits that
// actually need GoogleSQL to do their job.
//
// Code generation is wired up via buf (see ../buf.gen.yaml) for the Go
// side and via Bazel (see ./BUILD.bazel) for the C++ side. The Go
// stubs land in gateway/enginepb/ and are checked in so `go build`
// works without an extra codegen step; the C++ stubs are generated
// fresh into the Bazel output tree.

// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.2
// - protoc             v7.35.0
// source: emulator.proto

package enginepb

import (
        context "context"
        grpc "google.golang.org/grpc"
        codes "google.golang.org/grpc/codes"
        status "google.golang.org/grpc/status"
)

// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9

const (
        Catalog_RegisterDataset_FullMethodName       = "/bigquery_emulator.v1.Catalog/RegisterDataset"
        Catalog_DropDataset_FullMethodName           = "/bigquery_emulator.v1.Catalog/DropDataset"
        Catalog_UndeleteDataset_FullMethodName       = "/bigquery_emulator.v1.Catalog/UndeleteDataset"
        Catalog_ListDatasets_FullMethodName          = "/bigquery_emulator.v1.Catalog/ListDatasets"
        Catalog_RegisterTable_FullMethodName         = "/bigquery_emulator.v1.Catalog/RegisterTable"
        Catalog_DropTable_FullMethodName             = "/bigquery_emulator.v1.Catalog/DropTable"
        Catalog_ListTables_FullMethodName            = "/bigquery_emulator.v1.Catalog/ListTables"
        Catalog_DescribeTable_FullMethodName         = "/bigquery_emulator.v1.Catalog/DescribeTable"
        Catalog_InsertRows_FullMethodName            = "/bigquery_emulator.v1.Catalog/InsertRows"
        Catalog_ListRows_FullMethodName              = "/bigquery_emulator.v1.Catalog/ListRows"
        Catalog_ListRoutines_FullMethodName          = "/bigquery_emulator.v1.Catalog/ListRoutines"
        Catalog_GetRoutine_FullMethodName            = "/bigquery_emulator.v1.Catalog/GetRoutine"
        Catalog_UpsertRoutine_FullMethodName         = "/bigquery_emulator.v1.Catalog/UpsertRoutine"
        Catalog_DeleteRoutine_FullMethodName         = "/bigquery_emulator.v1.Catalog/DeleteRoutine"
        Catalog_UpsertRowAccessPolicy_FullMethodName = "/bigquery_emulator.v1.Catalog/UpsertRowAccessPolicy"
        Catalog_DeleteRowAccessPolicy_FullMethodName = "/bigquery_emulator.v1.Catalog/DeleteRowAccessPolicy"
        Catalog_ListRowAccessPolicies_FullMethodName = "/bigquery_emulator.v1.Catalog/ListRowAccessPolicies"
        Catalog_SetColumnGovernance_FullMethodName   = "/bigquery_emulator.v1.Catalog/SetColumnGovernance"
)

// CatalogClient is the client API for Catalog service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type CatalogClient interface {
        RegisterDataset(ctx context.Context, in *RegisterDatasetRequest, opts ...grpc.CallOption) (*RegisterDatasetResponse, error)
        DropDataset(ctx context.Context, in *DropDatasetRequest, opts ...grpc.CallOption) (*DropDatasetResponse, error)
        UndeleteDataset(ctx context.Context, in *UndeleteDatasetRequest, opts ...grpc.CallOption) (*UndeleteDatasetResponse, error)
        // Lists every dataset registered under `project_id`. The gateway's
        // `datasets.list` REST handler delegates here; the response is
        // shaped as a single page (no continuation cursor today, the
        // emulator never has enough datasets to need one).
        ListDatasets(ctx context.Context, in *ListDatasetsRequest, opts ...grpc.CallOption) (*ListDatasetsResponse, error)
        RegisterTable(ctx context.Context, in *RegisterTableRequest, opts ...grpc.CallOption) (*RegisterTableResponse, error)
        DropTable(ctx context.Context, in *DropTableRequest, opts ...grpc.CallOption) (*DropTableResponse, error)
        // Lists every table registered under `dataset`. The gateway's
        // `tables.list` REST handler delegates here; pagination contract
        // matches ListDatasets.
        ListTables(ctx context.Context, in *ListTablesRequest, opts ...grpc.CallOption) (*ListTablesResponse, error)
        DescribeTable(ctx context.Context, in *DescribeTableRequest, opts ...grpc.CallOption) (*DescribeTableResponse, error)
        // Row-level access. `InsertRows` delegates to `Storage::AppendRows`
        // and is the engine side of `tabledata.insertAll`. `ListRows` is
        // the engine side of `tabledata.list` and returns a single
        // (possibly empty) page of rows plus the total row count so the
        // gateway can synthesize BigQuery's pageToken semantics.
        InsertRows(ctx context.Context, in *InsertRowsRequest, opts ...grpc.CallOption) (*InsertRowsResponse, error)
        ListRows(ctx context.Context, in *ListRowsRequest, opts ...grpc.CallOption) (*ListRowsResponse, error)
        // Routine metadata persisted in `catalog.duckdb` for REST
        // round-trip and cross-restart SQL UDF / TVF / procedure replay.
        ListRoutines(ctx context.Context, in *ListRoutinesRequest, opts ...grpc.CallOption) (*ListRoutinesResponse, error)
        GetRoutine(ctx context.Context, in *GetRoutineRequest, opts ...grpc.CallOption) (*GetRoutineResponse, error)
        UpsertRoutine(ctx context.Context, in *UpsertRoutineRequest, opts ...grpc.CallOption) (*UpsertRoutineResponse, error)
        DeleteRoutine(ctx context.Context, in *DeleteRoutineRequest, opts ...grpc.CallOption) (*DeleteRoutineResponse, error)
        // Row-access policies and column-level security metadata.
        UpsertRowAccessPolicy(ctx context.Context, in *UpsertRowAccessPolicyRequest, opts ...grpc.CallOption) (*UpsertRowAccessPolicyResponse, error)
        DeleteRowAccessPolicy(ctx context.Context, in *DeleteRowAccessPolicyRequest, opts ...grpc.CallOption) (*DeleteRowAccessPolicyResponse, error)
        ListRowAccessPolicies(ctx context.Context, in *ListRowAccessPoliciesRequest, opts ...grpc.CallOption) (*ListRowAccessPoliciesResponse, error)
        SetColumnGovernance(ctx context.Context, in *SetColumnGovernanceRequest, opts ...grpc.CallOption) (*SetColumnGovernanceResponse, error)
}

type catalogClient struct {
        cc grpc.ClientConnInterface
}

func NewCatalogClient(cc grpc.ClientConnInterface) CatalogClient {
        return &catalogClient{cc}
}

func (c *catalogClient) RegisterDataset(ctx context.Context, in *RegisterDatasetRequest, opts ...grpc.CallOption) (*RegisterDatasetResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(RegisterDatasetResponse)
        err := c.cc.Invoke(ctx, Catalog_RegisterDataset_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) DropDataset(ctx context.Context, in *DropDatasetRequest, opts ...grpc.CallOption) (*DropDatasetResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(DropDatasetResponse)
        err := c.cc.Invoke(ctx, Catalog_DropDataset_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) UndeleteDataset(ctx context.Context, in *UndeleteDatasetRequest, opts ...grpc.CallOption) (*UndeleteDatasetResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(UndeleteDatasetResponse)
        err := c.cc.Invoke(ctx, Catalog_UndeleteDataset_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) ListDatasets(ctx context.Context, in *ListDatasetsRequest, opts ...grpc.CallOption) (*ListDatasetsResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ListDatasetsResponse)
        err := c.cc.Invoke(ctx, Catalog_ListDatasets_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) RegisterTable(ctx context.Context, in *RegisterTableRequest, opts ...grpc.CallOption) (*RegisterTableResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(RegisterTableResponse)
        err := c.cc.Invoke(ctx, Catalog_RegisterTable_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) DropTable(ctx context.Context, in *DropTableRequest, opts ...grpc.CallOption) (*DropTableResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(DropTableResponse)
        err := c.cc.Invoke(ctx, Catalog_DropTable_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) ListTables(ctx context.Context, in *ListTablesRequest, opts ...grpc.CallOption) (*ListTablesResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ListTablesResponse)
        err := c.cc.Invoke(ctx, Catalog_ListTables_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) DescribeTable(ctx context.Context, in *DescribeTableRequest, opts ...grpc.CallOption) (*DescribeTableResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(DescribeTableResponse)
        err := c.cc.Invoke(ctx, Catalog_DescribeTable_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) InsertRows(ctx context.Context, in *InsertRowsRequest, opts ...grpc.CallOption) (*InsertRowsResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(InsertRowsResponse)
        err := c.cc.Invoke(ctx, Catalog_InsertRows_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) ListRows(ctx context.Context, in *ListRowsRequest, opts ...grpc.CallOption) (*ListRowsResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ListRowsResponse)
        err := c.cc.Invoke(ctx, Catalog_ListRows_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) ListRoutines(ctx context.Context, in *ListRoutinesRequest, opts ...grpc.CallOption) (*ListRoutinesResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ListRoutinesResponse)
        err := c.cc.Invoke(ctx, Catalog_ListRoutines_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) GetRoutine(ctx context.Context, in *GetRoutineRequest, opts ...grpc.CallOption) (*GetRoutineResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(GetRoutineResponse)
        err := c.cc.Invoke(ctx, Catalog_GetRoutine_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) UpsertRoutine(ctx context.Context, in *UpsertRoutineRequest, opts ...grpc.CallOption) (*UpsertRoutineResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(UpsertRoutineResponse)
        err := c.cc.Invoke(ctx, Catalog_UpsertRoutine_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) DeleteRoutine(ctx context.Context, in *DeleteRoutineRequest, opts ...grpc.CallOption) (*DeleteRoutineResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(DeleteRoutineResponse)
        err := c.cc.Invoke(ctx, Catalog_DeleteRoutine_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) UpsertRowAccessPolicy(ctx context.Context, in *UpsertRowAccessPolicyRequest, opts ...grpc.CallOption) (*UpsertRowAccessPolicyResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(UpsertRowAccessPolicyResponse)
        err := c.cc.Invoke(ctx, Catalog_UpsertRowAccessPolicy_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) DeleteRowAccessPolicy(ctx context.Context, in *DeleteRowAccessPolicyRequest, opts ...grpc.CallOption) (*DeleteRowAccessPolicyResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(DeleteRowAccessPolicyResponse)
        err := c.cc.Invoke(ctx, Catalog_DeleteRowAccessPolicy_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) ListRowAccessPolicies(ctx context.Context, in *ListRowAccessPoliciesRequest, opts ...grpc.CallOption) (*ListRowAccessPoliciesResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ListRowAccessPoliciesResponse)
        err := c.cc.Invoke(ctx, Catalog_ListRowAccessPolicies_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *catalogClient) SetColumnGovernance(ctx context.Context, in *SetColumnGovernanceRequest, opts ...grpc.CallOption) (*SetColumnGovernanceResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(SetColumnGovernanceResponse)
        err := c.cc.Invoke(ctx, Catalog_SetColumnGovernance_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

// CatalogServer is the server API for Catalog service.
// All implementations should embed UnimplementedCatalogServer
// for forward compatibility.
type CatalogServer interface {
        RegisterDataset(context.Context, *RegisterDatasetRequest) (*RegisterDatasetResponse, error)
        DropDataset(context.Context, *DropDatasetRequest) (*DropDatasetResponse, error)
        UndeleteDataset(context.Context, *UndeleteDatasetRequest) (*UndeleteDatasetResponse, error)
        // Lists every dataset registered under `project_id`. The gateway's
        // `datasets.list` REST handler delegates here; the response is
        // shaped as a single page (no continuation cursor today, the
        // emulator never has enough datasets to need one).
        ListDatasets(context.Context, *ListDatasetsRequest) (*ListDatasetsResponse, error)
        RegisterTable(context.Context, *RegisterTableRequest) (*RegisterTableResponse, error)
        DropTable(context.Context, *DropTableRequest) (*DropTableResponse, error)
        // Lists every table registered under `dataset`. The gateway's
        // `tables.list` REST handler delegates here; pagination contract
        // matches ListDatasets.
        ListTables(context.Context, *ListTablesRequest) (*ListTablesResponse, error)
        DescribeTable(context.Context, *DescribeTableRequest) (*DescribeTableResponse, error)
        // Row-level access. `InsertRows` delegates to `Storage::AppendRows`
        // and is the engine side of `tabledata.insertAll`. `ListRows` is
        // the engine side of `tabledata.list` and returns a single
        // (possibly empty) page of rows plus the total row count so the
        // gateway can synthesize BigQuery's pageToken semantics.
        InsertRows(context.Context, *InsertRowsRequest) (*InsertRowsResponse, error)
        ListRows(context.Context, *ListRowsRequest) (*ListRowsResponse, error)
        // Routine metadata persisted in `catalog.duckdb` for REST
        // round-trip and cross-restart SQL UDF / TVF / procedure replay.
        ListRoutines(context.Context, *ListRoutinesRequest) (*ListRoutinesResponse, error)
        GetRoutine(context.Context, *GetRoutineRequest) (*GetRoutineResponse, error)
        UpsertRoutine(context.Context, *UpsertRoutineRequest) (*UpsertRoutineResponse, error)
        DeleteRoutine(context.Context, *DeleteRoutineRequest) (*DeleteRoutineResponse, error)
        // Row-access policies and column-level security metadata.
        UpsertRowAccessPolicy(context.Context, *UpsertRowAccessPolicyRequest) (*UpsertRowAccessPolicyResponse, error)
        DeleteRowAccessPolicy(context.Context, *DeleteRowAccessPolicyRequest) (*DeleteRowAccessPolicyResponse, error)
        ListRowAccessPolicies(context.Context, *ListRowAccessPoliciesRequest) (*ListRowAccessPoliciesResponse, error)
        SetColumnGovernance(context.Context, *SetColumnGovernanceRequest) (*SetColumnGovernanceResponse, error)
}

// UnimplementedCatalogServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedCatalogServer struct{}

func (UnimplementedCatalogServer) RegisterDataset(context.Context, *RegisterDatasetRequest) (*RegisterDatasetResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method RegisterDataset not implemented")
}
func (UnimplementedCatalogServer) DropDataset(context.Context, *DropDatasetRequest) (*DropDatasetResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method DropDataset not implemented")
}
func (UnimplementedCatalogServer) UndeleteDataset(context.Context, *UndeleteDatasetRequest) (*UndeleteDatasetResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method UndeleteDataset not implemented")
}
func (UnimplementedCatalogServer) ListDatasets(context.Context, *ListDatasetsRequest) (*ListDatasetsResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method ListDatasets not implemented")
}
func (UnimplementedCatalogServer) RegisterTable(context.Context, *RegisterTableRequest) (*RegisterTableResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method RegisterTable not implemented")
}
func (UnimplementedCatalogServer) DropTable(context.Context, *DropTableRequest) (*DropTableResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method DropTable not implemented")
}
func (UnimplementedCatalogServer) ListTables(context.Context, *ListTablesRequest) (*ListTablesResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method ListTables not implemented")
}
func (UnimplementedCatalogServer) DescribeTable(context.Context, *DescribeTableRequest) (*DescribeTableResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method DescribeTable not implemented")
}
func (UnimplementedCatalogServer) InsertRows(context.Context, *InsertRowsRequest) (*InsertRowsResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method InsertRows not implemented")
}
func (UnimplementedCatalogServer) ListRows(context.Context, *ListRowsRequest) (*ListRowsResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method ListRows not implemented")
}
func (UnimplementedCatalogServer) ListRoutines(context.Context, *ListRoutinesRequest) (*ListRoutinesResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method ListRoutines not implemented")
}
func (UnimplementedCatalogServer) GetRoutine(context.Context, *GetRoutineRequest) (*GetRoutineResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method GetRoutine not implemented")
}
func (UnimplementedCatalogServer) UpsertRoutine(context.Context, *UpsertRoutineRequest) (*UpsertRoutineResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method UpsertRoutine not implemented")
}
func (UnimplementedCatalogServer) DeleteRoutine(context.Context, *DeleteRoutineRequest) (*DeleteRoutineResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method DeleteRoutine not implemented")
}
func (UnimplementedCatalogServer) UpsertRowAccessPolicy(context.Context, *UpsertRowAccessPolicyRequest) (*UpsertRowAccessPolicyResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method UpsertRowAccessPolicy not implemented")
}
func (UnimplementedCatalogServer) DeleteRowAccessPolicy(context.Context, *DeleteRowAccessPolicyRequest) (*DeleteRowAccessPolicyResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method DeleteRowAccessPolicy not implemented")
}
func (UnimplementedCatalogServer) ListRowAccessPolicies(context.Context, *ListRowAccessPoliciesRequest) (*ListRowAccessPoliciesResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method ListRowAccessPolicies not implemented")
}
func (UnimplementedCatalogServer) SetColumnGovernance(context.Context, *SetColumnGovernanceRequest) (*SetColumnGovernanceResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method SetColumnGovernance not implemented")
}
func (UnimplementedCatalogServer) testEmbeddedByValue() {}

// UnsafeCatalogServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to CatalogServer will
// result in compilation errors.
type UnsafeCatalogServer interface {
        mustEmbedUnimplementedCatalogServer()
}

func RegisterCatalogServer(s grpc.ServiceRegistrar, srv CatalogServer) {
        // If the following call panics, it indicates UnimplementedCatalogServer was
        // embedded by pointer and is nil.  This will cause panics if an
        // unimplemented method is ever invoked, so we test this at initialization
        // time to prevent it from happening at runtime later due to I/O.
        if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
                t.testEmbeddedByValue()
        }
        s.RegisterService(&Catalog_ServiceDesc, srv)
}

func _Catalog_RegisterDataset_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(RegisterDatasetRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).RegisterDataset(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_RegisterDataset_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).RegisterDataset(ctx, req.(*RegisterDatasetRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_DropDataset_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(DropDatasetRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).DropDataset(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_DropDataset_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).DropDataset(ctx, req.(*DropDatasetRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_UndeleteDataset_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(UndeleteDatasetRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).UndeleteDataset(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_UndeleteDataset_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).UndeleteDataset(ctx, req.(*UndeleteDatasetRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_ListDatasets_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(ListDatasetsRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).ListDatasets(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_ListDatasets_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).ListDatasets(ctx, req.(*ListDatasetsRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_RegisterTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(RegisterTableRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).RegisterTable(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_RegisterTable_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).RegisterTable(ctx, req.(*RegisterTableRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_DropTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(DropTableRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).DropTable(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_DropTable_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).DropTable(ctx, req.(*DropTableRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_ListTables_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(ListTablesRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).ListTables(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_ListTables_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).ListTables(ctx, req.(*ListTablesRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_DescribeTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(DescribeTableRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).DescribeTable(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_DescribeTable_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).DescribeTable(ctx, req.(*DescribeTableRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_InsertRows_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(InsertRowsRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).InsertRows(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_InsertRows_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).InsertRows(ctx, req.(*InsertRowsRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_ListRows_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(ListRowsRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).ListRows(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_ListRows_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).ListRows(ctx, req.(*ListRowsRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_ListRoutines_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(ListRoutinesRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).ListRoutines(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_ListRoutines_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).ListRoutines(ctx, req.(*ListRoutinesRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_GetRoutine_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(GetRoutineRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).GetRoutine(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_GetRoutine_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).GetRoutine(ctx, req.(*GetRoutineRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_UpsertRoutine_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(UpsertRoutineRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).UpsertRoutine(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_UpsertRoutine_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).UpsertRoutine(ctx, req.(*UpsertRoutineRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_DeleteRoutine_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(DeleteRoutineRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).DeleteRoutine(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_DeleteRoutine_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).DeleteRoutine(ctx, req.(*DeleteRoutineRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_UpsertRowAccessPolicy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(UpsertRowAccessPolicyRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).UpsertRowAccessPolicy(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_UpsertRowAccessPolicy_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).UpsertRowAccessPolicy(ctx, req.(*UpsertRowAccessPolicyRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_DeleteRowAccessPolicy_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(DeleteRowAccessPolicyRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).DeleteRowAccessPolicy(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_DeleteRowAccessPolicy_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).DeleteRowAccessPolicy(ctx, req.(*DeleteRowAccessPolicyRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_ListRowAccessPolicies_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(ListRowAccessPoliciesRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).ListRowAccessPolicies(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_ListRowAccessPolicies_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).ListRowAccessPolicies(ctx, req.(*ListRowAccessPoliciesRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Catalog_SetColumnGovernance_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(SetColumnGovernanceRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(CatalogServer).SetColumnGovernance(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Catalog_SetColumnGovernance_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(CatalogServer).SetColumnGovernance(ctx, req.(*SetColumnGovernanceRequest))
        }
        return interceptor(ctx, in, info, handler)
}

// Catalog_ServiceDesc is the grpc.ServiceDesc for Catalog service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Catalog_ServiceDesc = grpc.ServiceDesc{
        ServiceName: "bigquery_emulator.v1.Catalog",
        HandlerType: (*CatalogServer)(nil),
        Methods: []grpc.MethodDesc{
                {
                        MethodName: "RegisterDataset",
                        Handler:    _Catalog_RegisterDataset_Handler,
                },
                {
                        MethodName: "DropDataset",
                        Handler:    _Catalog_DropDataset_Handler,
                },
                {
                        MethodName: "UndeleteDataset",
                        Handler:    _Catalog_UndeleteDataset_Handler,
                },
                {
                        MethodName: "ListDatasets",
                        Handler:    _Catalog_ListDatasets_Handler,
                },
                {
                        MethodName: "RegisterTable",
                        Handler:    _Catalog_RegisterTable_Handler,
                },
                {
                        MethodName: "DropTable",
                        Handler:    _Catalog_DropTable_Handler,
                },
                {
                        MethodName: "ListTables",
                        Handler:    _Catalog_ListTables_Handler,
                },
                {
                        MethodName: "DescribeTable",
                        Handler:    _Catalog_DescribeTable_Handler,
                },
                {
                        MethodName: "InsertRows",
                        Handler:    _Catalog_InsertRows_Handler,
                },
                {
                        MethodName: "ListRows",
                        Handler:    _Catalog_ListRows_Handler,
                },
                {
                        MethodName: "ListRoutines",
                        Handler:    _Catalog_ListRoutines_Handler,
                },
                {
                        MethodName: "GetRoutine",
                        Handler:    _Catalog_GetRoutine_Handler,
                },
                {
                        MethodName: "UpsertRoutine",
                        Handler:    _Catalog_UpsertRoutine_Handler,
                },
                {
                        MethodName: "DeleteRoutine",
                        Handler:    _Catalog_DeleteRoutine_Handler,
                },
                {
                        MethodName: "UpsertRowAccessPolicy",
                        Handler:    _Catalog_UpsertRowAccessPolicy_Handler,
                },
                {
                        MethodName: "DeleteRowAccessPolicy",
                        Handler:    _Catalog_DeleteRowAccessPolicy_Handler,
                },
                {
                        MethodName: "ListRowAccessPolicies",
                        Handler:    _Catalog_ListRowAccessPolicies_Handler,
                },
                {
                        MethodName: "SetColumnGovernance",
                        Handler:    _Catalog_SetColumnGovernance_Handler,
                },
        },
        Streams:  []grpc.StreamDesc{},
        Metadata: "emulator.proto",
}

const (
        Query_DryRun_FullMethodName       = "/bigquery_emulator.v1.Query/DryRun"
        Query_ExecuteQuery_FullMethodName = "/bigquery_emulator.v1.Query/ExecuteQuery"
)

// QueryClient is the client API for Query service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type QueryClient interface {
        DryRun(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*DryRunResponse, error)
        ExecuteQuery(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[QueryResultRow], error)
}

type queryClient struct {
        cc grpc.ClientConnInterface
}

func NewQueryClient(cc grpc.ClientConnInterface) QueryClient {
        return &queryClient{cc}
}

func (c *queryClient) DryRun(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*DryRunResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(DryRunResponse)
        err := c.cc.Invoke(ctx, Query_DryRun_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *queryClient) ExecuteQuery(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[QueryResultRow], error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        stream, err := c.cc.NewStream(ctx, &Query_ServiceDesc.Streams[0], Query_ExecuteQuery_FullMethodName, cOpts...)
        if err != nil {
                return nil, err
        }
        x := &grpc.GenericClientStream[QueryRequest, QueryResultRow]{ClientStream: stream}
        if err := x.ClientStream.SendMsg(in); err != nil {
                return nil, err
        }
        if err := x.ClientStream.CloseSend(); err != nil {
                return nil, err
        }
        return x, nil
}

// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Query_ExecuteQueryClient = grpc.ServerStreamingClient[QueryResultRow]

// QueryServer is the server API for Query service.
// All implementations should embed UnimplementedQueryServer
// for forward compatibility.
type QueryServer interface {
        DryRun(context.Context, *QueryRequest) (*DryRunResponse, error)
        ExecuteQuery(*QueryRequest, grpc.ServerStreamingServer[QueryResultRow]) error
}

// UnimplementedQueryServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedQueryServer struct{}

func (UnimplementedQueryServer) DryRun(context.Context, *QueryRequest) (*DryRunResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method DryRun not implemented")
}
func (UnimplementedQueryServer) ExecuteQuery(*QueryRequest, grpc.ServerStreamingServer[QueryResultRow]) error {
        return status.Error(codes.Unimplemented, "method ExecuteQuery not implemented")
}
func (UnimplementedQueryServer) testEmbeddedByValue() {}

// UnsafeQueryServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to QueryServer will
// result in compilation errors.
type UnsafeQueryServer interface {
        mustEmbedUnimplementedQueryServer()
}

func RegisterQueryServer(s grpc.ServiceRegistrar, srv QueryServer) {
        // If the following call panics, it indicates UnimplementedQueryServer was
        // embedded by pointer and is nil.  This will cause panics if an
        // unimplemented method is ever invoked, so we test this at initialization
        // time to prevent it from happening at runtime later due to I/O.
        if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
                t.testEmbeddedByValue()
        }
        s.RegisterService(&Query_ServiceDesc, srv)
}

func _Query_DryRun_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(QueryRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(QueryServer).DryRun(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: Query_DryRun_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(QueryServer).DryRun(ctx, req.(*QueryRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _Query_ExecuteQuery_Handler(srv interface{}, stream grpc.ServerStream) error {
        m := new(QueryRequest)
        if err := stream.RecvMsg(m); err != nil {
                return err
        }
        return srv.(QueryServer).ExecuteQuery(m, &grpc.GenericServerStream[QueryRequest, QueryResultRow]{ServerStream: stream})
}

// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type Query_ExecuteQueryServer = grpc.ServerStreamingServer[QueryResultRow]

// Query_ServiceDesc is the grpc.ServiceDesc for Query service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var Query_ServiceDesc = grpc.ServiceDesc{
        ServiceName: "bigquery_emulator.v1.Query",
        HandlerType: (*QueryServer)(nil),
        Methods: []grpc.MethodDesc{
                {
                        MethodName: "DryRun",
                        Handler:    _Query_DryRun_Handler,
                },
        },
        Streams: []grpc.StreamDesc{
                {
                        StreamName:    "ExecuteQuery",
                        Handler:       _Query_ExecuteQuery_Handler,
                        ServerStreams: true,
                },
        },
        Metadata: "emulator.proto",
}

const (
        SqlTools_Format_FullMethodName   = "/bigquery_emulator.v1.SqlTools/Format"
        SqlTools_Parse_FullMethodName    = "/bigquery_emulator.v1.SqlTools/Parse"
        SqlTools_Tokenize_FullMethodName = "/bigquery_emulator.v1.SqlTools/Tokenize"
        SqlTools_Complete_FullMethodName = "/bigquery_emulator.v1.SqlTools/Complete"
        SqlTools_Analyze_FullMethodName  = "/bigquery_emulator.v1.SqlTools/Analyze"
)

// SqlToolsClient is the client API for SqlTools service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type SqlToolsClient interface {
        Format(ctx context.Context, in *FormatSqlRequest, opts ...grpc.CallOption) (*FormatSqlResponse, error)
        Parse(ctx context.Context, in *ParseSqlRequest, opts ...grpc.CallOption) (*ParseSqlResponse, error)
        Tokenize(ctx context.Context, in *TokenizeSqlRequest, opts ...grpc.CallOption) (*TokenizeSqlResponse, error)
        Complete(ctx context.Context, in *CompleteSqlRequest, opts ...grpc.CallOption) (*CompleteSqlResponse, error)
        Analyze(ctx context.Context, in *AnalyzeSqlRequest, opts ...grpc.CallOption) (*AnalyzeSqlResponse, error)
}

type sqlToolsClient struct {
        cc grpc.ClientConnInterface
}

func NewSqlToolsClient(cc grpc.ClientConnInterface) SqlToolsClient {
        return &sqlToolsClient{cc}
}

func (c *sqlToolsClient) Format(ctx context.Context, in *FormatSqlRequest, opts ...grpc.CallOption) (*FormatSqlResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(FormatSqlResponse)
        err := c.cc.Invoke(ctx, SqlTools_Format_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *sqlToolsClient) Parse(ctx context.Context, in *ParseSqlRequest, opts ...grpc.CallOption) (*ParseSqlResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ParseSqlResponse)
        err := c.cc.Invoke(ctx, SqlTools_Parse_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *sqlToolsClient) Tokenize(ctx context.Context, in *TokenizeSqlRequest, opts ...grpc.CallOption) (*TokenizeSqlResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(TokenizeSqlResponse)
        err := c.cc.Invoke(ctx, SqlTools_Tokenize_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *sqlToolsClient) Complete(ctx context.Context, in *CompleteSqlRequest, opts ...grpc.CallOption) (*CompleteSqlResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(CompleteSqlResponse)
        err := c.cc.Invoke(ctx, SqlTools_Complete_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *sqlToolsClient) Analyze(ctx context.Context, in *AnalyzeSqlRequest, opts ...grpc.CallOption) (*AnalyzeSqlResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(AnalyzeSqlResponse)
        err := c.cc.Invoke(ctx, SqlTools_Analyze_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

// SqlToolsServer is the server API for SqlTools service.
// All implementations should embed UnimplementedSqlToolsServer
// for forward compatibility.
type SqlToolsServer interface {
        Format(context.Context, *FormatSqlRequest) (*FormatSqlResponse, error)
        Parse(context.Context, *ParseSqlRequest) (*ParseSqlResponse, error)
        Tokenize(context.Context, *TokenizeSqlRequest) (*TokenizeSqlResponse, error)
        Complete(context.Context, *CompleteSqlRequest) (*CompleteSqlResponse, error)
        Analyze(context.Context, *AnalyzeSqlRequest) (*AnalyzeSqlResponse, error)
}

// UnimplementedSqlToolsServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedSqlToolsServer struct{}

func (UnimplementedSqlToolsServer) Format(context.Context, *FormatSqlRequest) (*FormatSqlResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method Format not implemented")
}
func (UnimplementedSqlToolsServer) Parse(context.Context, *ParseSqlRequest) (*ParseSqlResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method Parse not implemented")
}
func (UnimplementedSqlToolsServer) Tokenize(context.Context, *TokenizeSqlRequest) (*TokenizeSqlResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method Tokenize not implemented")
}
func (UnimplementedSqlToolsServer) Complete(context.Context, *CompleteSqlRequest) (*CompleteSqlResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method Complete not implemented")
}
func (UnimplementedSqlToolsServer) Analyze(context.Context, *AnalyzeSqlRequest) (*AnalyzeSqlResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method Analyze not implemented")
}
func (UnimplementedSqlToolsServer) testEmbeddedByValue() {}

// UnsafeSqlToolsServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to SqlToolsServer will
// result in compilation errors.
type UnsafeSqlToolsServer interface {
        mustEmbedUnimplementedSqlToolsServer()
}

func RegisterSqlToolsServer(s grpc.ServiceRegistrar, srv SqlToolsServer) {
        // If the following call panics, it indicates UnimplementedSqlToolsServer was
        // embedded by pointer and is nil.  This will cause panics if an
        // unimplemented method is ever invoked, so we test this at initialization
        // time to prevent it from happening at runtime later due to I/O.
        if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
                t.testEmbeddedByValue()
        }
        s.RegisterService(&SqlTools_ServiceDesc, srv)
}

func _SqlTools_Format_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(FormatSqlRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(SqlToolsServer).Format(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: SqlTools_Format_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(SqlToolsServer).Format(ctx, req.(*FormatSqlRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _SqlTools_Parse_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(ParseSqlRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(SqlToolsServer).Parse(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: SqlTools_Parse_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(SqlToolsServer).Parse(ctx, req.(*ParseSqlRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _SqlTools_Tokenize_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(TokenizeSqlRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(SqlToolsServer).Tokenize(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: SqlTools_Tokenize_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(SqlToolsServer).Tokenize(ctx, req.(*TokenizeSqlRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _SqlTools_Complete_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(CompleteSqlRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(SqlToolsServer).Complete(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: SqlTools_Complete_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(SqlToolsServer).Complete(ctx, req.(*CompleteSqlRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _SqlTools_Analyze_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(AnalyzeSqlRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(SqlToolsServer).Analyze(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: SqlTools_Analyze_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(SqlToolsServer).Analyze(ctx, req.(*AnalyzeSqlRequest))
        }
        return interceptor(ctx, in, info, handler)
}

// SqlTools_ServiceDesc is the grpc.ServiceDesc for SqlTools service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var SqlTools_ServiceDesc = grpc.ServiceDesc{
        ServiceName: "bigquery_emulator.v1.SqlTools",
        HandlerType: (*SqlToolsServer)(nil),
        Methods: []grpc.MethodDesc{
                {
                        MethodName: "Format",
                        Handler:    _SqlTools_Format_Handler,
                },
                {
                        MethodName: "Parse",
                        Handler:    _SqlTools_Parse_Handler,
                },
                {
                        MethodName: "Tokenize",
                        Handler:    _SqlTools_Tokenize_Handler,
                },
                {
                        MethodName: "Complete",
                        Handler:    _SqlTools_Complete_Handler,
                },
                {
                        MethodName: "Analyze",
                        Handler:    _SqlTools_Analyze_Handler,
                },
        },
        Streams:  []grpc.StreamDesc{},
        Metadata: "emulator.proto",
}

// storage_read.proto is the internal contract for the BigQuery Storage
// Read API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.list` reads against large
// tables into Storage Read RPCs against this service so the C++ engine
// can stream rows back without the entire result set living in the
// gateway's heap.
//
// The engine implements `CreateReadSession` and the streaming
// `ReadRows` reply, including per-column projection
// (`selected_fields`) and `row_restriction` pushdown, and the gateway
// is wired to it (with e2e coverage under `gateway/e2e/`). The shape
// here is the **simplified** Storage Read v1 contract: no Arrow/Avro
// projections (rows ride on the same `DataRow` cells that
// `Catalog.ListRows` already returns), no SplitReadStream RPC, no
// session liveness extension. Those are documented as "future" so
// the conformance harness can pin per-feature gaps.
//
// Code generation:
//   - Go:  `task proto:gen` writes
//          `gateway/enginepb/storage_read.{pb,grpc.pb}.go`. Same
//          plugin pipeline as `emulator.proto`.
//   - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
//          `proto/BUILD.bazel` emit
//          `storage_read.{pb,grpc.pb}.{h,cc}` into bazel-bin.

// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
//         protoc-gen-go v1.36.11
//         protoc        (unknown)
// source: storage_read.proto

package enginepb

import (
        protoreflect "google.golang.org/protobuf/reflect/protoreflect"
        protoimpl "google.golang.org/protobuf/runtime/protoimpl"
        reflect "reflect"
        sync "sync"
        unsafe "unsafe"
)

const (
        // Verify that this generated code is sufficiently up-to-date.
        _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
        // Verify that runtime/protoimpl is sufficiently up-to-date.
        _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)

type CreateReadSessionRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // BigQuery resource path the session belongs to: `projects/{project_id}`.
        // The gateway derives this from the REST URL path so different
        // projects do not see each other's sessions.
        Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
        // Session shape the caller wants to read. `read_session.table` names
        // the table; `read_options.selected_fields` and
        // `read_options.row_restriction` are validated here and applied when
        // the streams are drained.
        ReadSession *ReadSession `protobuf:"bytes,2,opt,name=read_session,json=readSession,proto3" json:"read_session,omitempty"`
        // Maximum number of streams the caller is willing to drain in
        // parallel. When unset or zero the engine returns one stream. The
        // server may return fewer streams than requested depending on table
        // size (each stream must cover at least one row when possible).
        MaxStreamCount int32 `protobuf:"varint,3,opt,name=max_stream_count,json=maxStreamCount,proto3" json:"max_stream_count,omitempty"`
        unknownFields  protoimpl.UnknownFields
        sizeCache      protoimpl.SizeCache
}

func (x *CreateReadSessionRequest) Reset() {
        *x = CreateReadSessionRequest{}
        mi := &file_storage_read_proto_msgTypes[0]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *CreateReadSessionRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*CreateReadSessionRequest) ProtoMessage() {}

func (x *CreateReadSessionRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[0]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use CreateReadSessionRequest.ProtoReflect.Descriptor instead.
func (*CreateReadSessionRequest) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{0}
}

func (x *CreateReadSessionRequest) GetParent() string {
        if x != nil {
                return x.Parent
        }
        return ""
}

func (x *CreateReadSessionRequest) GetReadSession() *ReadSession {
        if x != nil {
                return x.ReadSession
        }
        return nil
}

func (x *CreateReadSessionRequest) GetMaxStreamCount() int32 {
        if x != nil {
                return x.MaxStreamCount
        }
        return 0
}

type ReadSession struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Server-assigned session id, of the form
        // `projects/{project_id}/locations/{location}/sessions/{session_id}`.
        // The gateway returns this in the REST `name` field so a follow-up
        // `ReadRows` call can find the session again.
        Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        // Fully-qualified BigQuery table id the session is reading from:
        // `projects/{project_id}/datasets/{dataset_id}/tables/{table_id}`.
        // Set on the request side (caller specifies which table to read);
        // echoed back on the response side so the caller does not have to
        // re-derive it.
        Table string `protobuf:"bytes,2,opt,name=table,proto3" json:"table,omitempty"`
        // Schema of the rows the streams will emit. Populated by the engine
        // from `Storage::GetSchema` so the caller can decode the cells the
        // same way it decodes `tabledata.list` rows.
        Schema *TableSchema `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"`
        // Subset of fields to read; the engine validates the list at
        // session creation and projects rows to these columns on ReadRows.
        ReadOptions *ReadOptions `protobuf:"bytes,4,opt,name=read_options,json=readOptions,proto3" json:"read_options,omitempty"`
        // Streams the caller can drain. Each one is independent and any
        // single row in `table` appears in exactly one stream. The engine
        // always returns a single stream with the full table.
        Streams       []*ReadStream `protobuf:"bytes,5,rep,name=streams,proto3" json:"streams,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ReadSession) Reset() {
        *x = ReadSession{}
        mi := &file_storage_read_proto_msgTypes[1]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ReadSession) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ReadSession) ProtoMessage() {}

func (x *ReadSession) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[1]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ReadSession.ProtoReflect.Descriptor instead.
func (*ReadSession) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{1}
}

func (x *ReadSession) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

func (x *ReadSession) GetTable() string {
        if x != nil {
                return x.Table
        }
        return ""
}

func (x *ReadSession) GetSchema() *TableSchema {
        if x != nil {
                return x.Schema
        }
        return nil
}

func (x *ReadSession) GetReadOptions() *ReadOptions {
        if x != nil {
                return x.ReadOptions
        }
        return nil
}

func (x *ReadSession) GetStreams() []*ReadStream {
        if x != nil {
                return x.Streams
        }
        return nil
}

type ReadOptions struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Names of the columns the caller wants returned. Empty / unset
        // means "all columns". Non-empty lists are validated at session
        // creation and rows are projected to exactly these columns.
        SelectedFields []string `protobuf:"bytes,1,rep,name=selected_fields,json=selectedFields,proto3" json:"selected_fields,omitempty"`
        // SQL-shaped filter expression the engine pushes down. The
        // restriction is analyzed with GoogleSQL against the table schema
        // and transpiled into a DuckDB `WHERE` clause; unsupported shapes
        // reject at CreateReadSession with INVALID_ARGUMENT. The predicate
        // is applied before `offset`, matching BigQuery's documented
        // semantics.
        RowRestriction string `protobuf:"bytes,2,opt,name=row_restriction,json=rowRestriction,proto3" json:"row_restriction,omitempty"`
        unknownFields  protoimpl.UnknownFields
        sizeCache      protoimpl.SizeCache
}

func (x *ReadOptions) Reset() {
        *x = ReadOptions{}
        mi := &file_storage_read_proto_msgTypes[2]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ReadOptions) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ReadOptions) ProtoMessage() {}

func (x *ReadOptions) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[2]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ReadOptions.ProtoReflect.Descriptor instead.
func (*ReadOptions) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{2}
}

func (x *ReadOptions) GetSelectedFields() []string {
        if x != nil {
                return x.SelectedFields
        }
        return nil
}

func (x *ReadOptions) GetRowRestriction() string {
        if x != nil {
                return x.RowRestriction
        }
        return ""
}

type ReadStream struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Server-assigned stream id of the form
        // `{session_name}/streams/{stream_id}`. The caller passes this
        // back to `ReadRows` to drain rows off this stream.
        Name          string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ReadStream) Reset() {
        *x = ReadStream{}
        mi := &file_storage_read_proto_msgTypes[3]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ReadStream) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ReadStream) ProtoMessage() {}

func (x *ReadStream) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[3]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ReadStream.ProtoReflect.Descriptor instead.
func (*ReadStream) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{3}
}

func (x *ReadStream) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

type ReadRowsRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Stream id returned by `CreateReadSession`. The engine looks the
        // session up by stripping the trailing `/streams/{id}` and finding
        // the matching `ReadSession`.
        ReadStream string `protobuf:"bytes,1,opt,name=read_stream,json=readStream,proto3" json:"read_stream,omitempty"`
        // Row offset to resume from. The gateway uses this to re-attach to
        // a stream after a transient failure without re-driving rows the
        // client already received. The engine honors the offset (counted
        // over the post-filter row stream).
        Offset        int64 `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ReadRowsRequest) Reset() {
        *x = ReadRowsRequest{}
        mi := &file_storage_read_proto_msgTypes[4]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ReadRowsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ReadRowsRequest) ProtoMessage() {}

func (x *ReadRowsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[4]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ReadRowsRequest.ProtoReflect.Descriptor instead.
func (*ReadRowsRequest) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{4}
}

func (x *ReadRowsRequest) GetReadStream() string {
        if x != nil {
                return x.ReadStream
        }
        return ""
}

func (x *ReadRowsRequest) GetOffset() int64 {
        if x != nil {
                return x.Offset
        }
        return 0
}

type ReadRowsResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Rows on this page of the stream. The engine streams rows in
        // fixed-size pages (see `kReadRowsBatchSize` in
        // `frontend/handlers/storage_read.cc`).
        Rows []*DataRow `protobuf:"bytes,1,rep,name=rows,proto3" json:"rows,omitempty"`
        // Number of rows in this page. Convenience field so callers do not
        // have to count `rows.size()` on every reply.
        RowCount      int64 `protobuf:"varint,2,opt,name=row_count,json=rowCount,proto3" json:"row_count,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *ReadRowsResponse) Reset() {
        *x = ReadRowsResponse{}
        mi := &file_storage_read_proto_msgTypes[5]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *ReadRowsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*ReadRowsResponse) ProtoMessage() {}

func (x *ReadRowsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[5]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use ReadRowsResponse.ProtoReflect.Descriptor instead.
func (*ReadRowsResponse) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{5}
}

func (x *ReadRowsResponse) GetRows() []*DataRow {
        if x != nil {
                return x.Rows
        }
        return nil
}

func (x *ReadRowsResponse) GetRowCount() int64 {
        if x != nil {
                return x.RowCount
        }
        return 0
}

type SplitReadStreamRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Stream id to split (`{session_name}/streams/{id}`).
        Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        // Fraction in (0.0, 1.0) of the stream's remaining row range at
        // which to split. Values outside that open interval are rejected.
        Fraction      float64 `protobuf:"fixed64,2,opt,name=fraction,proto3" json:"fraction,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *SplitReadStreamRequest) Reset() {
        *x = SplitReadStreamRequest{}
        mi := &file_storage_read_proto_msgTypes[6]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SplitReadStreamRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SplitReadStreamRequest) ProtoMessage() {}

func (x *SplitReadStreamRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[6]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SplitReadStreamRequest.ProtoReflect.Descriptor instead.
func (*SplitReadStreamRequest) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{6}
}

func (x *SplitReadStreamRequest) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

func (x *SplitReadStreamRequest) GetFraction() float64 {
        if x != nil {
                return x.Fraction
        }
        return 0
}

type SplitReadStreamResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Head portion of the split stream's remaining range.
        PrimaryStream *ReadStream `protobuf:"bytes,1,opt,name=primary_stream,json=primaryStream,proto3" json:"primary_stream,omitempty"`
        // Tail portion of the split stream's remaining range.
        RemainderStream *ReadStream `protobuf:"bytes,2,opt,name=remainder_stream,json=remainderStream,proto3" json:"remainder_stream,omitempty"`
        unknownFields   protoimpl.UnknownFields
        sizeCache       protoimpl.SizeCache
}

func (x *SplitReadStreamResponse) Reset() {
        *x = SplitReadStreamResponse{}
        mi := &file_storage_read_proto_msgTypes[7]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *SplitReadStreamResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*SplitReadStreamResponse) ProtoMessage() {}

func (x *SplitReadStreamResponse) ProtoReflect() protoreflect.Message {
        mi := &file_storage_read_proto_msgTypes[7]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use SplitReadStreamResponse.ProtoReflect.Descriptor instead.
func (*SplitReadStreamResponse) Descriptor() ([]byte, []int) {
        return file_storage_read_proto_rawDescGZIP(), []int{7}
}

func (x *SplitReadStreamResponse) GetPrimaryStream() *ReadStream {
        if x != nil {
                return x.PrimaryStream
        }
        return nil
}

func (x *SplitReadStreamResponse) GetRemainderStream() *ReadStream {
        if x != nil {
                return x.RemainderStream
        }
        return nil
}

var File_storage_read_proto protoreflect.FileDescriptor

const file_storage_read_proto_rawDesc = "" +
        "\n" +
        "\x12storage_read.proto\x12\x14bigquery_emulator.v1\x1a\x0eemulator.proto\"\xa2\x01\n" +
        "\x18CreateReadSessionRequest\x12\x16\n" +
        "\x06parent\x18\x01 \x01(\tR\x06parent\x12D\n" +
        "\fread_session\x18\x02 \x01(\v2!.bigquery_emulator.v1.ReadSessionR\vreadSession\x12(\n" +
        "\x10max_stream_count\x18\x03 \x01(\x05R\x0emaxStreamCount\"\xf4\x01\n" +
        "\vReadSession\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n" +
        "\x05table\x18\x02 \x01(\tR\x05table\x129\n" +
        "\x06schema\x18\x03 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12D\n" +
        "\fread_options\x18\x04 \x01(\v2!.bigquery_emulator.v1.ReadOptionsR\vreadOptions\x12:\n" +
        "\astreams\x18\x05 \x03(\v2 .bigquery_emulator.v1.ReadStreamR\astreams\"_\n" +
        "\vReadOptions\x12'\n" +
        "\x0fselected_fields\x18\x01 \x03(\tR\x0eselectedFields\x12'\n" +
        "\x0frow_restriction\x18\x02 \x01(\tR\x0erowRestriction\" \n" +
        "\n" +
        "ReadStream\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\"J\n" +
        "\x0fReadRowsRequest\x12\x1f\n" +
        "\vread_stream\x18\x01 \x01(\tR\n" +
        "readStream\x12\x16\n" +
        "\x06offset\x18\x02 \x01(\x03R\x06offset\"b\n" +
        "\x10ReadRowsResponse\x121\n" +
        "\x04rows\x18\x01 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\x12\x1b\n" +
        "\trow_count\x18\x02 \x01(\x03R\browCount\"H\n" +
        "\x16SplitReadStreamRequest\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\x12\x1a\n" +
        "\bfraction\x18\x02 \x01(\x01R\bfraction\"\xaf\x01\n" +
        "\x17SplitReadStreamResponse\x12G\n" +
        "\x0eprimary_stream\x18\x01 \x01(\v2 .bigquery_emulator.v1.ReadStreamR\rprimaryStream\x12K\n" +
        "\x10remainder_stream\x18\x02 \x01(\v2 .bigquery_emulator.v1.ReadStreamR\x0fremainderStream2\xc2\x02\n" +
        "\vStorageRead\x12f\n" +
        "\x11CreateReadSession\x12..bigquery_emulator.v1.CreateReadSessionRequest\x1a!.bigquery_emulator.v1.ReadSession\x12[\n" +
        "\bReadRows\x12%.bigquery_emulator.v1.ReadRowsRequest\x1a&.bigquery_emulator.v1.ReadRowsResponse0\x01\x12n\n" +
        "\x0fSplitReadStream\x12,.bigquery_emulator.v1.SplitReadStreamRequest\x1a-.bigquery_emulator.v1.SplitReadStreamResponseBFZAgithub.com/vantaboard/bigquery-emulator/gateway/enginepb;enginepb\xf8\x01\x01b\x06proto3"

var (
        file_storage_read_proto_rawDescOnce sync.Once
        file_storage_read_proto_rawDescData []byte
)

func file_storage_read_proto_rawDescGZIP() []byte {
        file_storage_read_proto_rawDescOnce.Do(func() {
                file_storage_read_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_storage_read_proto_rawDesc), len(file_storage_read_proto_rawDesc)))
        })
        return file_storage_read_proto_rawDescData
}

var file_storage_read_proto_msgTypes = make([]protoimpl.MessageInfo, 8)
var file_storage_read_proto_goTypes = []any{
        (*CreateReadSessionRequest)(nil), // 0: bigquery_emulator.v1.CreateReadSessionRequest
        (*ReadSession)(nil),              // 1: bigquery_emulator.v1.ReadSession
        (*ReadOptions)(nil),              // 2: bigquery_emulator.v1.ReadOptions
        (*ReadStream)(nil),               // 3: bigquery_emulator.v1.ReadStream
        (*ReadRowsRequest)(nil),          // 4: bigquery_emulator.v1.ReadRowsRequest
        (*ReadRowsResponse)(nil),         // 5: bigquery_emulator.v1.ReadRowsResponse
        (*SplitReadStreamRequest)(nil),   // 6: bigquery_emulator.v1.SplitReadStreamRequest
        (*SplitReadStreamResponse)(nil),  // 7: bigquery_emulator.v1.SplitReadStreamResponse
        (*TableSchema)(nil),              // 8: bigquery_emulator.v1.TableSchema
        (*DataRow)(nil),                  // 9: bigquery_emulator.v1.DataRow
}
var file_storage_read_proto_depIdxs = []int32{
        1,  // 0: bigquery_emulator.v1.CreateReadSessionRequest.read_session:type_name -> bigquery_emulator.v1.ReadSession
        8,  // 1: bigquery_emulator.v1.ReadSession.schema:type_name -> bigquery_emulator.v1.TableSchema
        2,  // 2: bigquery_emulator.v1.ReadSession.read_options:type_name -> bigquery_emulator.v1.ReadOptions
        3,  // 3: bigquery_emulator.v1.ReadSession.streams:type_name -> bigquery_emulator.v1.ReadStream
        9,  // 4: bigquery_emulator.v1.ReadRowsResponse.rows:type_name -> bigquery_emulator.v1.DataRow
        3,  // 5: bigquery_emulator.v1.SplitReadStreamResponse.primary_stream:type_name -> bigquery_emulator.v1.ReadStream
        3,  // 6: bigquery_emulator.v1.SplitReadStreamResponse.remainder_stream:type_name -> bigquery_emulator.v1.ReadStream
        0,  // 7: bigquery_emulator.v1.StorageRead.CreateReadSession:input_type -> bigquery_emulator.v1.CreateReadSessionRequest
        4,  // 8: bigquery_emulator.v1.StorageRead.ReadRows:input_type -> bigquery_emulator.v1.ReadRowsRequest
        6,  // 9: bigquery_emulator.v1.StorageRead.SplitReadStream:input_type -> bigquery_emulator.v1.SplitReadStreamRequest
        1,  // 10: bigquery_emulator.v1.StorageRead.CreateReadSession:output_type -> bigquery_emulator.v1.ReadSession
        5,  // 11: bigquery_emulator.v1.StorageRead.ReadRows:output_type -> bigquery_emulator.v1.ReadRowsResponse
        7,  // 12: bigquery_emulator.v1.StorageRead.SplitReadStream:output_type -> bigquery_emulator.v1.SplitReadStreamResponse
        10, // [10:13] is the sub-list for method output_type
        7,  // [7:10] is the sub-list for method input_type
        7,  // [7:7] is the sub-list for extension type_name
        7,  // [7:7] is the sub-list for extension extendee
        0,  // [0:7] is the sub-list for field type_name
}

func init() { file_storage_read_proto_init() }
func file_storage_read_proto_init() {
        if File_storage_read_proto != nil {
                return
        }
        file_emulator_proto_init()
        type x struct{}
        out := protoimpl.TypeBuilder{
                File: protoimpl.DescBuilder{
                        GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
                        RawDescriptor: unsafe.Slice(unsafe.StringData(file_storage_read_proto_rawDesc), len(file_storage_read_proto_rawDesc)),
                        NumEnums:      0,
                        NumMessages:   8,
                        NumExtensions: 0,
                        NumServices:   1,
                },
                GoTypes:           file_storage_read_proto_goTypes,
                DependencyIndexes: file_storage_read_proto_depIdxs,
                MessageInfos:      file_storage_read_proto_msgTypes,
        }.Build()
        File_storage_read_proto = out.File
        file_storage_read_proto_goTypes = nil
        file_storage_read_proto_depIdxs = nil
}

// storage_read.proto is the internal contract for the BigQuery Storage
// Read API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.list` reads against large
// tables into Storage Read RPCs against this service so the C++ engine
// can stream rows back without the entire result set living in the
// gateway's heap.
//
// The engine implements `CreateReadSession` and the streaming
// `ReadRows` reply, including per-column projection
// (`selected_fields`) and `row_restriction` pushdown, and the gateway
// is wired to it (with e2e coverage under `gateway/e2e/`). The shape
// here is the **simplified** Storage Read v1 contract: no Arrow/Avro
// projections (rows ride on the same `DataRow` cells that
// `Catalog.ListRows` already returns), no SplitReadStream RPC, no
// session liveness extension. Those are documented as "future" so
// the conformance harness can pin per-feature gaps.
//
// Code generation:
//   - Go:  `task proto:gen` writes
//          `gateway/enginepb/storage_read.{pb,grpc.pb}.go`. Same
//          plugin pipeline as `emulator.proto`.
//   - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
//          `proto/BUILD.bazel` emit
//          `storage_read.{pb,grpc.pb}.{h,cc}` into bazel-bin.

// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.2
// - protoc             (unknown)
// source: storage_read.proto

package enginepb

import (
        context "context"
        grpc "google.golang.org/grpc"
        codes "google.golang.org/grpc/codes"
        status "google.golang.org/grpc/status"
)

// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9

const (
        StorageRead_CreateReadSession_FullMethodName = "/bigquery_emulator.v1.StorageRead/CreateReadSession"
        StorageRead_ReadRows_FullMethodName          = "/bigquery_emulator.v1.StorageRead/ReadRows"
        StorageRead_SplitReadStream_FullMethodName   = "/bigquery_emulator.v1.StorageRead/SplitReadStream"
)

// StorageReadClient is the client API for StorageRead service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type StorageReadClient interface {
        // CreateReadSession validates the request, materializes a session
        // pinning the table + schema, and returns the session handle the
        // caller will pass back to `ReadRows`. The reply carries the table
        // schema verbatim so the caller does not need a follow-up
        // `Catalog.DescribeTable` round-trip before starting to read.
        CreateReadSession(ctx context.Context, in *CreateReadSessionRequest, opts ...grpc.CallOption) (*ReadSession, error)
        // ReadRows streams rows off the named stream. The stream id must be
        // one of the `ReadSession.streams[*].name` values returned by the
        // matching `CreateReadSession` call (or a child stream minted by
        // `SplitReadStream`).
        ReadRows(ctx context.Context, in *ReadRowsRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ReadRowsResponse], error)
        // SplitReadStream subdivides an existing stream's remaining row
        // range into a primary stream (head) and a residual stream (tail).
        SplitReadStream(ctx context.Context, in *SplitReadStreamRequest, opts ...grpc.CallOption) (*SplitReadStreamResponse, error)
}

type storageReadClient struct {
        cc grpc.ClientConnInterface
}

func NewStorageReadClient(cc grpc.ClientConnInterface) StorageReadClient {
        return &storageReadClient{cc}
}

func (c *storageReadClient) CreateReadSession(ctx context.Context, in *CreateReadSessionRequest, opts ...grpc.CallOption) (*ReadSession, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(ReadSession)
        err := c.cc.Invoke(ctx, StorageRead_CreateReadSession_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *storageReadClient) ReadRows(ctx context.Context, in *ReadRowsRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[ReadRowsResponse], error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        stream, err := c.cc.NewStream(ctx, &StorageRead_ServiceDesc.Streams[0], StorageRead_ReadRows_FullMethodName, cOpts...)
        if err != nil {
                return nil, err
        }
        x := &grpc.GenericClientStream[ReadRowsRequest, ReadRowsResponse]{ClientStream: stream}
        if err := x.ClientStream.SendMsg(in); err != nil {
                return nil, err
        }
        if err := x.ClientStream.CloseSend(); err != nil {
                return nil, err
        }
        return x, nil
}

// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageRead_ReadRowsClient = grpc.ServerStreamingClient[ReadRowsResponse]

func (c *storageReadClient) SplitReadStream(ctx context.Context, in *SplitReadStreamRequest, opts ...grpc.CallOption) (*SplitReadStreamResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(SplitReadStreamResponse)
        err := c.cc.Invoke(ctx, StorageRead_SplitReadStream_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

// StorageReadServer is the server API for StorageRead service.
// All implementations should embed UnimplementedStorageReadServer
// for forward compatibility.
type StorageReadServer interface {
        // CreateReadSession validates the request, materializes a session
        // pinning the table + schema, and returns the session handle the
        // caller will pass back to `ReadRows`. The reply carries the table
        // schema verbatim so the caller does not need a follow-up
        // `Catalog.DescribeTable` round-trip before starting to read.
        CreateReadSession(context.Context, *CreateReadSessionRequest) (*ReadSession, error)
        // ReadRows streams rows off the named stream. The stream id must be
        // one of the `ReadSession.streams[*].name` values returned by the
        // matching `CreateReadSession` call (or a child stream minted by
        // `SplitReadStream`).
        ReadRows(*ReadRowsRequest, grpc.ServerStreamingServer[ReadRowsResponse]) error
        // SplitReadStream subdivides an existing stream's remaining row
        // range into a primary stream (head) and a residual stream (tail).
        SplitReadStream(context.Context, *SplitReadStreamRequest) (*SplitReadStreamResponse, error)
}

// UnimplementedStorageReadServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedStorageReadServer struct{}

func (UnimplementedStorageReadServer) CreateReadSession(context.Context, *CreateReadSessionRequest) (*ReadSession, error) {
        return nil, status.Error(codes.Unimplemented, "method CreateReadSession not implemented")
}
func (UnimplementedStorageReadServer) ReadRows(*ReadRowsRequest, grpc.ServerStreamingServer[ReadRowsResponse]) error {
        return status.Error(codes.Unimplemented, "method ReadRows not implemented")
}
func (UnimplementedStorageReadServer) SplitReadStream(context.Context, *SplitReadStreamRequest) (*SplitReadStreamResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method SplitReadStream not implemented")
}
func (UnimplementedStorageReadServer) testEmbeddedByValue() {}

// UnsafeStorageReadServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to StorageReadServer will
// result in compilation errors.
type UnsafeStorageReadServer interface {
        mustEmbedUnimplementedStorageReadServer()
}

func RegisterStorageReadServer(s grpc.ServiceRegistrar, srv StorageReadServer) {
        // If the following call panics, it indicates UnimplementedStorageReadServer was
        // embedded by pointer and is nil.  This will cause panics if an
        // unimplemented method is ever invoked, so we test this at initialization
        // time to prevent it from happening at runtime later due to I/O.
        if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
                t.testEmbeddedByValue()
        }
        s.RegisterService(&StorageRead_ServiceDesc, srv)
}

func _StorageRead_CreateReadSession_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(CreateReadSessionRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageReadServer).CreateReadSession(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageRead_CreateReadSession_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageReadServer).CreateReadSession(ctx, req.(*CreateReadSessionRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _StorageRead_ReadRows_Handler(srv interface{}, stream grpc.ServerStream) error {
        m := new(ReadRowsRequest)
        if err := stream.RecvMsg(m); err != nil {
                return err
        }
        return srv.(StorageReadServer).ReadRows(m, &grpc.GenericServerStream[ReadRowsRequest, ReadRowsResponse]{ServerStream: stream})
}

// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageRead_ReadRowsServer = grpc.ServerStreamingServer[ReadRowsResponse]

func _StorageRead_SplitReadStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(SplitReadStreamRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageReadServer).SplitReadStream(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageRead_SplitReadStream_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageReadServer).SplitReadStream(ctx, req.(*SplitReadStreamRequest))
        }
        return interceptor(ctx, in, info, handler)
}

// StorageRead_ServiceDesc is the grpc.ServiceDesc for StorageRead service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var StorageRead_ServiceDesc = grpc.ServiceDesc{
        ServiceName: "bigquery_emulator.v1.StorageRead",
        HandlerType: (*StorageReadServer)(nil),
        Methods: []grpc.MethodDesc{
                {
                        MethodName: "CreateReadSession",
                        Handler:    _StorageRead_CreateReadSession_Handler,
                },
                {
                        MethodName: "SplitReadStream",
                        Handler:    _StorageRead_SplitReadStream_Handler,
                },
        },
        Streams: []grpc.StreamDesc{
                {
                        StreamName:    "ReadRows",
                        Handler:       _StorageRead_ReadRows_Handler,
                        ServerStreams: true,
                },
        },
        Metadata: "storage_read.proto",
}

// storage_write.proto is the internal contract for the BigQuery Storage
// Write API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.insertAll` calls (and the
// gRPC AppendRows path used directly by the Storage Write client
// libraries) into RPCs against this service so the C++ engine can
// commit rows through the same `DuckDBStorage::AppendRows` primitive
// the local DML executor already uses.
//
// Storage Read/Write API handlers for `_default`, `COMMITTED`, `BUFFERED`,
// and `PENDING` stream types ship end-to-end; see `ROADMAP.md` for the
// posture matrix.
//
// Code generation:
//   - Go:  `task proto:gen` writes
//          `gateway/enginepb/storage_write.{pb,grpc.pb}.go`. Same
//          plugin pipeline as `emulator.proto`.
//   - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
//          `proto/BUILD.bazel` emit
//          `storage_write.{pb,grpc.pb}.{h,cc}` into bazel-bin.

// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
//         protoc-gen-go v1.36.11
//         protoc        (unknown)
// source: storage_write.proto

package enginepb

import (
        protoreflect "google.golang.org/protobuf/reflect/protoreflect"
        protoimpl "google.golang.org/protobuf/runtime/protoimpl"
        reflect "reflect"
        sync "sync"
        unsafe "unsafe"
)

const (
        // Verify that this generated code is sufficiently up-to-date.
        _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
        // Verify that runtime/protoimpl is sufficiently up-to-date.
        _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)

// BigQuery's stream lifecycle vocabulary.
type WriteStream_Type int32

const (
        // Reserved zero-value for proto3. Server treats this as
        // COMMITTED (matching BigQuery's documented default).
        WriteStream_TYPE_UNSPECIFIED WriteStream_Type = 0
        // `_default` and explicit COMMITTED both commit on every
        // AppendRows batch (rows immediately visible to readers).
        // The emulator supports both.
        WriteStream_COMMITTED WriteStream_Type = 1
        // PENDING streams buffer rows server-side until
        // `BatchCommitWriteStreams` makes them visible. Reserved for
        // the deferred follow-up; CreateWriteStream returns
        // UNIMPLEMENTED today.
        WriteStream_PENDING WriteStream_Type = 2
        // BUFFERED streams buffer rows server-side until `FlushRows`
        // advances the visibility offset. Reserved for the deferred
        // follow-up; CreateWriteStream returns UNIMPLEMENTED today.
        WriteStream_BUFFERED WriteStream_Type = 3
)

// Enum value maps for WriteStream_Type.
var (
        WriteStream_Type_name = map[int32]string{
                0: "TYPE_UNSPECIFIED",
                1: "COMMITTED",
                2: "PENDING",
                3: "BUFFERED",
        }
        WriteStream_Type_value = map[string]int32{
                "TYPE_UNSPECIFIED": 0,
                "COMMITTED":        1,
                "PENDING":          2,
                "BUFFERED":         3,
        }
)

func (x WriteStream_Type) Enum() *WriteStream_Type {
        p := new(WriteStream_Type)
        *p = x
        return p
}

func (x WriteStream_Type) String() string {
        return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
}

func (WriteStream_Type) Descriptor() protoreflect.EnumDescriptor {
        return file_storage_write_proto_enumTypes[0].Descriptor()
}

func (WriteStream_Type) Type() protoreflect.EnumType {
        return &file_storage_write_proto_enumTypes[0]
}

func (x WriteStream_Type) Number() protoreflect.EnumNumber {
        return protoreflect.EnumNumber(x)
}

// Deprecated: Use WriteStream_Type.Descriptor instead.
func (WriteStream_Type) EnumDescriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{0, 0}
}

// WriteStream describes a write stream by name + type + pinned schema.
// `name` is the server-assigned id of the form
// `{table}/streams/{id}` (or the canonical `_default` reserved name);
// the caller passes it back to `AppendRows`.
type WriteStream struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Server-assigned stream id. Format:
        //
        //        `projects/{p}/datasets/{d}/tables/{t}/streams/{id}`
        //
        // The `_default` stream uses the reserved id `_default`; the
        // engine also accepts that the caller did not call
        // CreateWriteStream first and routes appends to the table's
        // implicit default stream.
        Name string           `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        Type WriteStream_Type `protobuf:"varint,2,opt,name=type,proto3,enum=bigquery_emulator.v1.WriteStream_Type" json:"type,omitempty"`
        // Schema the stream pins at creation time. Engine populates this
        // from the source table's `Storage::GetSchema` reply so the
        // caller can sanity-check writer alignment without a follow-up
        // DescribeTable round-trip.
        Schema *TableSchema `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"`
        // RFC3339 timestamp the stream was minted. The handler stamps
        // this so a follow-up `GetWriteStream` can surface stream
        // age (BigQuery uses it for retention windows).
        CreateTime    string `protobuf:"bytes,4,opt,name=create_time,json=createTime,proto3" json:"create_time,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *WriteStream) Reset() {
        *x = WriteStream{}
        mi := &file_storage_write_proto_msgTypes[0]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *WriteStream) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*WriteStream) ProtoMessage() {}

func (x *WriteStream) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[0]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use WriteStream.ProtoReflect.Descriptor instead.
func (*WriteStream) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{0}
}

func (x *WriteStream) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

func (x *WriteStream) GetType() WriteStream_Type {
        if x != nil {
                return x.Type
        }
        return WriteStream_TYPE_UNSPECIFIED
}

func (x *WriteStream) GetSchema() *TableSchema {
        if x != nil {
                return x.Schema
        }
        return nil
}

func (x *WriteStream) GetCreateTime() string {
        if x != nil {
                return x.CreateTime
        }
        return ""
}

type CreateWriteStreamRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // BigQuery resource path the stream belongs to:
        // `projects/{p}/datasets/{d}/tables/{t}`.
        Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
        // Stream shape the caller wants. Only `type` is read today;
        // `name` and `schema` are server-populated.
        WriteStream   *WriteStream `protobuf:"bytes,2,opt,name=write_stream,json=writeStream,proto3" json:"write_stream,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *CreateWriteStreamRequest) Reset() {
        *x = CreateWriteStreamRequest{}
        mi := &file_storage_write_proto_msgTypes[1]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *CreateWriteStreamRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*CreateWriteStreamRequest) ProtoMessage() {}

func (x *CreateWriteStreamRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[1]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use CreateWriteStreamRequest.ProtoReflect.Descriptor instead.
func (*CreateWriteStreamRequest) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{1}
}

func (x *CreateWriteStreamRequest) GetParent() string {
        if x != nil {
                return x.Parent
        }
        return ""
}

func (x *CreateWriteStreamRequest) GetWriteStream() *WriteStream {
        if x != nil {
                return x.WriteStream
        }
        return nil
}

type AppendRowsRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // The full stream name (matching `WriteStream.name`). Required on
        // the first message; subsequent messages on the same stream may
        // leave this empty (the handler keeps the first message's
        // binding) or re-assert the same value.
        WriteStream string `protobuf:"bytes,1,opt,name=write_stream,json=writeStream,proto3" json:"write_stream,omitempty"`
        // Optional offset for ordered append. The handler ignores the
        // value for `_default` / `COMMITTED` (every append is immediately
        // committed in arrival order); the field is here so the wire
        // shape is forward-compatible with BUFFERED / PENDING flows.
        Offset    int64                        `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
        ProtoRows *AppendRowsRequest_ProtoData `protobuf:"bytes,4,opt,name=proto_rows,json=protoRows,proto3" json:"proto_rows,omitempty"`
        // Caller-supplied trace id, mirrored back on the response so
        // the producer can correlate appends with replies. Optional.
        TraceId       string `protobuf:"bytes,6,opt,name=trace_id,json=traceId,proto3" json:"trace_id,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *AppendRowsRequest) Reset() {
        *x = AppendRowsRequest{}
        mi := &file_storage_write_proto_msgTypes[2]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *AppendRowsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*AppendRowsRequest) ProtoMessage() {}

func (x *AppendRowsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[2]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use AppendRowsRequest.ProtoReflect.Descriptor instead.
func (*AppendRowsRequest) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{2}
}

func (x *AppendRowsRequest) GetWriteStream() string {
        if x != nil {
                return x.WriteStream
        }
        return ""
}

func (x *AppendRowsRequest) GetOffset() int64 {
        if x != nil {
                return x.Offset
        }
        return 0
}

func (x *AppendRowsRequest) GetProtoRows() *AppendRowsRequest_ProtoData {
        if x != nil {
                return x.ProtoRows
        }
        return nil
}

func (x *AppendRowsRequest) GetTraceId() string {
        if x != nil {
                return x.TraceId
        }
        return ""
}

type AppendRowsResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Either AppendResult or error is set, never both.
        //
        // Types that are valid to be assigned to Response:
        //
        //        *AppendRowsResponse_AppendResult_
        //        *AppendRowsResponse_ErrorMessage
        Response isAppendRowsResponse_Response `protobuf_oneof:"response"`
        // Trace id echoed from the request (when set). Empty if the
        // request did not pin one.
        TraceId string `protobuf:"bytes,3,opt,name=trace_id,json=traceId,proto3" json:"trace_id,omitempty"`
        // Number of rows the engine committed for this request. The
        // public surface does not advertise this on success; we surface
        // it explicitly so the in-process gRPC test can assert per-batch
        // commit counts without re-querying the table.
        RowCount      int64 `protobuf:"varint,4,opt,name=row_count,json=rowCount,proto3" json:"row_count,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *AppendRowsResponse) Reset() {
        *x = AppendRowsResponse{}
        mi := &file_storage_write_proto_msgTypes[3]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *AppendRowsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*AppendRowsResponse) ProtoMessage() {}

func (x *AppendRowsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[3]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use AppendRowsResponse.ProtoReflect.Descriptor instead.
func (*AppendRowsResponse) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{3}
}

func (x *AppendRowsResponse) GetResponse() isAppendRowsResponse_Response {
        if x != nil {
                return x.Response
        }
        return nil
}

func (x *AppendRowsResponse) GetAppendResult() *AppendRowsResponse_AppendResult {
        if x != nil {
                if x, ok := x.Response.(*AppendRowsResponse_AppendResult_); ok {
                        return x.AppendResult
                }
        }
        return nil
}

func (x *AppendRowsResponse) GetErrorMessage() string {
        if x != nil {
                if x, ok := x.Response.(*AppendRowsResponse_ErrorMessage); ok {
                        return x.ErrorMessage
                }
        }
        return ""
}

func (x *AppendRowsResponse) GetTraceId() string {
        if x != nil {
                return x.TraceId
        }
        return ""
}

func (x *AppendRowsResponse) GetRowCount() int64 {
        if x != nil {
                return x.RowCount
        }
        return 0
}

type isAppendRowsResponse_Response interface {
        isAppendRowsResponse_Response()
}

type AppendRowsResponse_AppendResult_ struct {
        AppendResult *AppendRowsResponse_AppendResult `protobuf:"bytes,1,opt,name=append_result,json=appendResult,proto3,oneof"`
}

type AppendRowsResponse_ErrorMessage struct {
        // Error message from the storage layer (`DuckDBStorage::AppendRows`
        // failures, schema mismatches, ...). The handler maps absl
        // statuses onto a free-form message here; the public BigQuery
        // surface uses `google.rpc.Status` but we keep the simpler
        // shape so we don't pull `google.rpc.status` in.
        ErrorMessage string `protobuf:"bytes,2,opt,name=error_message,json=errorMessage,proto3,oneof"`
}

func (*AppendRowsResponse_AppendResult_) isAppendRowsResponse_Response() {}

func (*AppendRowsResponse_ErrorMessage) isAppendRowsResponse_Response() {}

type GetWriteStreamRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Name          string                 `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *GetWriteStreamRequest) Reset() {
        *x = GetWriteStreamRequest{}
        mi := &file_storage_write_proto_msgTypes[4]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *GetWriteStreamRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*GetWriteStreamRequest) ProtoMessage() {}

func (x *GetWriteStreamRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[4]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use GetWriteStreamRequest.ProtoReflect.Descriptor instead.
func (*GetWriteStreamRequest) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{4}
}

func (x *GetWriteStreamRequest) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

type FinalizeWriteStreamRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Name          string                 `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *FinalizeWriteStreamRequest) Reset() {
        *x = FinalizeWriteStreamRequest{}
        mi := &file_storage_write_proto_msgTypes[5]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FinalizeWriteStreamRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FinalizeWriteStreamRequest) ProtoMessage() {}

func (x *FinalizeWriteStreamRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[5]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FinalizeWriteStreamRequest.ProtoReflect.Descriptor instead.
func (*FinalizeWriteStreamRequest) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{5}
}

func (x *FinalizeWriteStreamRequest) GetName() string {
        if x != nil {
                return x.Name
        }
        return ""
}

type FinalizeWriteStreamResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Total rows committed on the stream. Reserved for the deferred
        // follow-up; the emulator returns UNIMPLEMENTED.
        RowCount      int64 `protobuf:"varint,1,opt,name=row_count,json=rowCount,proto3" json:"row_count,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *FinalizeWriteStreamResponse) Reset() {
        *x = FinalizeWriteStreamResponse{}
        mi := &file_storage_write_proto_msgTypes[6]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FinalizeWriteStreamResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FinalizeWriteStreamResponse) ProtoMessage() {}

func (x *FinalizeWriteStreamResponse) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[6]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FinalizeWriteStreamResponse.ProtoReflect.Descriptor instead.
func (*FinalizeWriteStreamResponse) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{6}
}

func (x *FinalizeWriteStreamResponse) GetRowCount() int64 {
        if x != nil {
                return x.RowCount
        }
        return 0
}

type BatchCommitWriteStreamsRequest struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // Parent table that owns the streams.
        Parent string `protobuf:"bytes,1,opt,name=parent,proto3" json:"parent,omitempty"`
        // Stream names to commit atomically. Reserved for the deferred
        // follow-up; the emulator returns UNIMPLEMENTED.
        WriteStreams  []string `protobuf:"bytes,2,rep,name=write_streams,json=writeStreams,proto3" json:"write_streams,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *BatchCommitWriteStreamsRequest) Reset() {
        *x = BatchCommitWriteStreamsRequest{}
        mi := &file_storage_write_proto_msgTypes[7]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *BatchCommitWriteStreamsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*BatchCommitWriteStreamsRequest) ProtoMessage() {}

func (x *BatchCommitWriteStreamsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[7]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use BatchCommitWriteStreamsRequest.ProtoReflect.Descriptor instead.
func (*BatchCommitWriteStreamsRequest) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{7}
}

func (x *BatchCommitWriteStreamsRequest) GetParent() string {
        if x != nil {
                return x.Parent
        }
        return ""
}

func (x *BatchCommitWriteStreamsRequest) GetWriteStreams() []string {
        if x != nil {
                return x.WriteStreams
        }
        return nil
}

type BatchCommitWriteStreamsResponse struct {
        state protoimpl.MessageState `protogen:"open.v1"`
        // RFC3339 timestamp the commit landed. Reserved for the deferred
        // follow-up.
        CommitTime    string `protobuf:"bytes,1,opt,name=commit_time,json=commitTime,proto3" json:"commit_time,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *BatchCommitWriteStreamsResponse) Reset() {
        *x = BatchCommitWriteStreamsResponse{}
        mi := &file_storage_write_proto_msgTypes[8]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *BatchCommitWriteStreamsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*BatchCommitWriteStreamsResponse) ProtoMessage() {}

func (x *BatchCommitWriteStreamsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[8]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use BatchCommitWriteStreamsResponse.ProtoReflect.Descriptor instead.
func (*BatchCommitWriteStreamsResponse) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{8}
}

func (x *BatchCommitWriteStreamsResponse) GetCommitTime() string {
        if x != nil {
                return x.CommitTime
        }
        return ""
}

type FlushRowsRequest struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        WriteStream   string                 `protobuf:"bytes,1,opt,name=write_stream,json=writeStream,proto3" json:"write_stream,omitempty"`
        Offset        int64                  `protobuf:"varint,2,opt,name=offset,proto3" json:"offset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *FlushRowsRequest) Reset() {
        *x = FlushRowsRequest{}
        mi := &file_storage_write_proto_msgTypes[9]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FlushRowsRequest) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FlushRowsRequest) ProtoMessage() {}

func (x *FlushRowsRequest) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[9]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FlushRowsRequest.ProtoReflect.Descriptor instead.
func (*FlushRowsRequest) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{9}
}

func (x *FlushRowsRequest) GetWriteStream() string {
        if x != nil {
                return x.WriteStream
        }
        return ""
}

func (x *FlushRowsRequest) GetOffset() int64 {
        if x != nil {
                return x.Offset
        }
        return 0
}

type FlushRowsResponse struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Offset        int64                  `protobuf:"varint,1,opt,name=offset,proto3" json:"offset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *FlushRowsResponse) Reset() {
        *x = FlushRowsResponse{}
        mi := &file_storage_write_proto_msgTypes[10]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *FlushRowsResponse) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*FlushRowsResponse) ProtoMessage() {}

func (x *FlushRowsResponse) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[10]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use FlushRowsResponse.ProtoReflect.Descriptor instead.
func (*FlushRowsResponse) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{10}
}

func (x *FlushRowsResponse) GetOffset() int64 {
        if x != nil {
                return x.Offset
        }
        return 0
}

// ProtoData carries the rows + the writer's schema. The writer
// schema is informational today (we trust the table's schema for
// shape validation); the rows ride on `DataRow` cells, the same
// shape `Catalog.InsertRows` uses.
type AppendRowsRequest_ProtoData struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        WriterSchema  *TableSchema           `protobuf:"bytes,1,opt,name=writer_schema,json=writerSchema,proto3" json:"writer_schema,omitempty"`
        Rows          []*DataRow             `protobuf:"bytes,2,rep,name=rows,proto3" json:"rows,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *AppendRowsRequest_ProtoData) Reset() {
        *x = AppendRowsRequest_ProtoData{}
        mi := &file_storage_write_proto_msgTypes[11]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *AppendRowsRequest_ProtoData) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*AppendRowsRequest_ProtoData) ProtoMessage() {}

func (x *AppendRowsRequest_ProtoData) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[11]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use AppendRowsRequest_ProtoData.ProtoReflect.Descriptor instead.
func (*AppendRowsRequest_ProtoData) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{2, 0}
}

func (x *AppendRowsRequest_ProtoData) GetWriterSchema() *TableSchema {
        if x != nil {
                return x.WriterSchema
        }
        return nil
}

func (x *AppendRowsRequest_ProtoData) GetRows() []*DataRow {
        if x != nil {
                return x.Rows
        }
        return nil
}

// AppendResult is the success envelope; carries the offset of
// the first row in the batch on a `_default` / `COMMITTED`
// stream that's just `prior_offset + 0` (every append commits
// in arrival order).
type AppendRowsResponse_AppendResult struct {
        state         protoimpl.MessageState `protogen:"open.v1"`
        Offset        int64                  `protobuf:"varint,1,opt,name=offset,proto3" json:"offset,omitempty"`
        unknownFields protoimpl.UnknownFields
        sizeCache     protoimpl.SizeCache
}

func (x *AppendRowsResponse_AppendResult) Reset() {
        *x = AppendRowsResponse_AppendResult{}
        mi := &file_storage_write_proto_msgTypes[12]
        ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
        ms.StoreMessageInfo(mi)
}

func (x *AppendRowsResponse_AppendResult) String() string {
        return protoimpl.X.MessageStringOf(x)
}

func (*AppendRowsResponse_AppendResult) ProtoMessage() {}

func (x *AppendRowsResponse_AppendResult) ProtoReflect() protoreflect.Message {
        mi := &file_storage_write_proto_msgTypes[12]
        if x != nil {
                ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
                if ms.LoadMessageInfo() == nil {
                        ms.StoreMessageInfo(mi)
                }
                return ms
        }
        return mi.MessageOf(x)
}

// Deprecated: Use AppendRowsResponse_AppendResult.ProtoReflect.Descriptor instead.
func (*AppendRowsResponse_AppendResult) Descriptor() ([]byte, []int) {
        return file_storage_write_proto_rawDescGZIP(), []int{3, 0}
}

func (x *AppendRowsResponse_AppendResult) GetOffset() int64 {
        if x != nil {
                return x.Offset
        }
        return 0
}

var File_storage_write_proto protoreflect.FileDescriptor

const file_storage_write_proto_rawDesc = "" +
        "\n" +
        "\x13storage_write.proto\x12\x14bigquery_emulator.v1\x1a\x0eemulator.proto\"\x81\x02\n" +
        "\vWriteStream\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\x12:\n" +
        "\x04type\x18\x02 \x01(\x0e2&.bigquery_emulator.v1.WriteStream.TypeR\x04type\x129\n" +
        "\x06schema\x18\x03 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\x06schema\x12\x1f\n" +
        "\vcreate_time\x18\x04 \x01(\tR\n" +
        "createTime\"F\n" +
        "\x04Type\x12\x14\n" +
        "\x10TYPE_UNSPECIFIED\x10\x00\x12\r\n" +
        "\tCOMMITTED\x10\x01\x12\v\n" +
        "\aPENDING\x10\x02\x12\f\n" +
        "\bBUFFERED\x10\x03\"x\n" +
        "\x18CreateWriteStreamRequest\x12\x16\n" +
        "\x06parent\x18\x01 \x01(\tR\x06parent\x12D\n" +
        "\fwrite_stream\x18\x02 \x01(\v2!.bigquery_emulator.v1.WriteStreamR\vwriteStream\"\xc4\x02\n" +
        "\x11AppendRowsRequest\x12!\n" +
        "\fwrite_stream\x18\x01 \x01(\tR\vwriteStream\x12\x16\n" +
        "\x06offset\x18\x02 \x01(\x03R\x06offset\x12P\n" +
        "\n" +
        "proto_rows\x18\x04 \x01(\v21.bigquery_emulator.v1.AppendRowsRequest.ProtoDataR\tprotoRows\x12\x19\n" +
        "\btrace_id\x18\x06 \x01(\tR\atraceId\x1a\x86\x01\n" +
        "\tProtoData\x12F\n" +
        "\rwriter_schema\x18\x01 \x01(\v2!.bigquery_emulator.v1.TableSchemaR\fwriterSchema\x121\n" +
        "\x04rows\x18\x02 \x03(\v2\x1d.bigquery_emulator.v1.DataRowR\x04rows\"\x85\x02\n" +
        "\x12AppendRowsResponse\x12\\\n" +
        "\rappend_result\x18\x01 \x01(\v25.bigquery_emulator.v1.AppendRowsResponse.AppendResultH\x00R\fappendResult\x12%\n" +
        "\rerror_message\x18\x02 \x01(\tH\x00R\ferrorMessage\x12\x19\n" +
        "\btrace_id\x18\x03 \x01(\tR\atraceId\x12\x1b\n" +
        "\trow_count\x18\x04 \x01(\x03R\browCount\x1a&\n" +
        "\fAppendResult\x12\x16\n" +
        "\x06offset\x18\x01 \x01(\x03R\x06offsetB\n" +
        "\n" +
        "\bresponse\"+\n" +
        "\x15GetWriteStreamRequest\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\"0\n" +
        "\x1aFinalizeWriteStreamRequest\x12\x12\n" +
        "\x04name\x18\x01 \x01(\tR\x04name\":\n" +
        "\x1bFinalizeWriteStreamResponse\x12\x1b\n" +
        "\trow_count\x18\x01 \x01(\x03R\browCount\"]\n" +
        "\x1eBatchCommitWriteStreamsRequest\x12\x16\n" +
        "\x06parent\x18\x01 \x01(\tR\x06parent\x12#\n" +
        "\rwrite_streams\x18\x02 \x03(\tR\fwriteStreams\"B\n" +
        "\x1fBatchCommitWriteStreamsResponse\x12\x1f\n" +
        "\vcommit_time\x18\x01 \x01(\tR\n" +
        "commitTime\"M\n" +
        "\x10FlushRowsRequest\x12!\n" +
        "\fwrite_stream\x18\x01 \x01(\tR\vwriteStream\x12\x16\n" +
        "\x06offset\x18\x02 \x01(\x03R\x06offset\"+\n" +
        "\x11FlushRowsResponse\x12\x16\n" +
        "\x06offset\x18\x01 \x01(\x03R\x06offset2\xa0\x05\n" +
        "\fStorageWrite\x12f\n" +
        "\x11CreateWriteStream\x12..bigquery_emulator.v1.CreateWriteStreamRequest\x1a!.bigquery_emulator.v1.WriteStream\x12c\n" +
        "\n" +
        "AppendRows\x12'.bigquery_emulator.v1.AppendRowsRequest\x1a(.bigquery_emulator.v1.AppendRowsResponse(\x010\x01\x12`\n" +
        "\x0eGetWriteStream\x12+.bigquery_emulator.v1.GetWriteStreamRequest\x1a!.bigquery_emulator.v1.WriteStream\x12z\n" +
        "\x13FinalizeWriteStream\x120.bigquery_emulator.v1.FinalizeWriteStreamRequest\x1a1.bigquery_emulator.v1.FinalizeWriteStreamResponse\x12\x86\x01\n" +
        "\x17BatchCommitWriteStreams\x124.bigquery_emulator.v1.BatchCommitWriteStreamsRequest\x1a5.bigquery_emulator.v1.BatchCommitWriteStreamsResponse\x12\\\n" +
        "\tFlushRows\x12&.bigquery_emulator.v1.FlushRowsRequest\x1a'.bigquery_emulator.v1.FlushRowsResponseBFZAgithub.com/vantaboard/bigquery-emulator/gateway/enginepb;enginepb\xf8\x01\x01b\x06proto3"

var (
        file_storage_write_proto_rawDescOnce sync.Once
        file_storage_write_proto_rawDescData []byte
)

func file_storage_write_proto_rawDescGZIP() []byte {
        file_storage_write_proto_rawDescOnce.Do(func() {
                file_storage_write_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_storage_write_proto_rawDesc), len(file_storage_write_proto_rawDesc)))
        })
        return file_storage_write_proto_rawDescData
}

var file_storage_write_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
var file_storage_write_proto_msgTypes = make([]protoimpl.MessageInfo, 13)
var file_storage_write_proto_goTypes = []any{
        (WriteStream_Type)(0),                   // 0: bigquery_emulator.v1.WriteStream.Type
        (*WriteStream)(nil),                     // 1: bigquery_emulator.v1.WriteStream
        (*CreateWriteStreamRequest)(nil),        // 2: bigquery_emulator.v1.CreateWriteStreamRequest
        (*AppendRowsRequest)(nil),               // 3: bigquery_emulator.v1.AppendRowsRequest
        (*AppendRowsResponse)(nil),              // 4: bigquery_emulator.v1.AppendRowsResponse
        (*GetWriteStreamRequest)(nil),           // 5: bigquery_emulator.v1.GetWriteStreamRequest
        (*FinalizeWriteStreamRequest)(nil),      // 6: bigquery_emulator.v1.FinalizeWriteStreamRequest
        (*FinalizeWriteStreamResponse)(nil),     // 7: bigquery_emulator.v1.FinalizeWriteStreamResponse
        (*BatchCommitWriteStreamsRequest)(nil),  // 8: bigquery_emulator.v1.BatchCommitWriteStreamsRequest
        (*BatchCommitWriteStreamsResponse)(nil), // 9: bigquery_emulator.v1.BatchCommitWriteStreamsResponse
        (*FlushRowsRequest)(nil),                // 10: bigquery_emulator.v1.FlushRowsRequest
        (*FlushRowsResponse)(nil),               // 11: bigquery_emulator.v1.FlushRowsResponse
        (*AppendRowsRequest_ProtoData)(nil),     // 12: bigquery_emulator.v1.AppendRowsRequest.ProtoData
        (*AppendRowsResponse_AppendResult)(nil), // 13: bigquery_emulator.v1.AppendRowsResponse.AppendResult
        (*TableSchema)(nil),                     // 14: bigquery_emulator.v1.TableSchema
        (*DataRow)(nil),                         // 15: bigquery_emulator.v1.DataRow
}
var file_storage_write_proto_depIdxs = []int32{
        0,  // 0: bigquery_emulator.v1.WriteStream.type:type_name -> bigquery_emulator.v1.WriteStream.Type
        14, // 1: bigquery_emulator.v1.WriteStream.schema:type_name -> bigquery_emulator.v1.TableSchema
        1,  // 2: bigquery_emulator.v1.CreateWriteStreamRequest.write_stream:type_name -> bigquery_emulator.v1.WriteStream
        12, // 3: bigquery_emulator.v1.AppendRowsRequest.proto_rows:type_name -> bigquery_emulator.v1.AppendRowsRequest.ProtoData
        13, // 4: bigquery_emulator.v1.AppendRowsResponse.append_result:type_name -> bigquery_emulator.v1.AppendRowsResponse.AppendResult
        14, // 5: bigquery_emulator.v1.AppendRowsRequest.ProtoData.writer_schema:type_name -> bigquery_emulator.v1.TableSchema
        15, // 6: bigquery_emulator.v1.AppendRowsRequest.ProtoData.rows:type_name -> bigquery_emulator.v1.DataRow
        2,  // 7: bigquery_emulator.v1.StorageWrite.CreateWriteStream:input_type -> bigquery_emulator.v1.CreateWriteStreamRequest
        3,  // 8: bigquery_emulator.v1.StorageWrite.AppendRows:input_type -> bigquery_emulator.v1.AppendRowsRequest
        5,  // 9: bigquery_emulator.v1.StorageWrite.GetWriteStream:input_type -> bigquery_emulator.v1.GetWriteStreamRequest
        6,  // 10: bigquery_emulator.v1.StorageWrite.FinalizeWriteStream:input_type -> bigquery_emulator.v1.FinalizeWriteStreamRequest
        8,  // 11: bigquery_emulator.v1.StorageWrite.BatchCommitWriteStreams:input_type -> bigquery_emulator.v1.BatchCommitWriteStreamsRequest
        10, // 12: bigquery_emulator.v1.StorageWrite.FlushRows:input_type -> bigquery_emulator.v1.FlushRowsRequest
        1,  // 13: bigquery_emulator.v1.StorageWrite.CreateWriteStream:output_type -> bigquery_emulator.v1.WriteStream
        4,  // 14: bigquery_emulator.v1.StorageWrite.AppendRows:output_type -> bigquery_emulator.v1.AppendRowsResponse
        1,  // 15: bigquery_emulator.v1.StorageWrite.GetWriteStream:output_type -> bigquery_emulator.v1.WriteStream
        7,  // 16: bigquery_emulator.v1.StorageWrite.FinalizeWriteStream:output_type -> bigquery_emulator.v1.FinalizeWriteStreamResponse
        9,  // 17: bigquery_emulator.v1.StorageWrite.BatchCommitWriteStreams:output_type -> bigquery_emulator.v1.BatchCommitWriteStreamsResponse
        11, // 18: bigquery_emulator.v1.StorageWrite.FlushRows:output_type -> bigquery_emulator.v1.FlushRowsResponse
        13, // [13:19] is the sub-list for method output_type
        7,  // [7:13] is the sub-list for method input_type
        7,  // [7:7] is the sub-list for extension type_name
        7,  // [7:7] is the sub-list for extension extendee
        0,  // [0:7] is the sub-list for field type_name
}

func init() { file_storage_write_proto_init() }
func file_storage_write_proto_init() {
        if File_storage_write_proto != nil {
                return
        }
        file_emulator_proto_init()
        file_storage_write_proto_msgTypes[3].OneofWrappers = []any{
                (*AppendRowsResponse_AppendResult_)(nil),
                (*AppendRowsResponse_ErrorMessage)(nil),
        }
        type x struct{}
        out := protoimpl.TypeBuilder{
                File: protoimpl.DescBuilder{
                        GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
                        RawDescriptor: unsafe.Slice(unsafe.StringData(file_storage_write_proto_rawDesc), len(file_storage_write_proto_rawDesc)),
                        NumEnums:      1,
                        NumMessages:   13,
                        NumExtensions: 0,
                        NumServices:   1,
                },
                GoTypes:           file_storage_write_proto_goTypes,
                DependencyIndexes: file_storage_write_proto_depIdxs,
                EnumInfos:         file_storage_write_proto_enumTypes,
                MessageInfos:      file_storage_write_proto_msgTypes,
        }.Build()
        File_storage_write_proto = out.File
        file_storage_write_proto_goTypes = nil
        file_storage_write_proto_depIdxs = nil
}

// storage_write.proto is the internal contract for the BigQuery Storage
// Write API surface (BQ public name: `google.cloud.bigquery.storage.v1`).
// The Go gateway translates REST `tabledata.insertAll` calls (and the
// gRPC AppendRows path used directly by the Storage Write client
// libraries) into RPCs against this service so the C++ engine can
// commit rows through the same `DuckDBStorage::AppendRows` primitive
// the local DML executor already uses.
//
// Storage Read/Write API handlers for `_default`, `COMMITTED`, `BUFFERED`,
// and `PENDING` stream types ship end-to-end; see `ROADMAP.md` for the
// posture matrix.
//
// Code generation:
//   - Go:  `task proto:gen` writes
//          `gateway/enginepb/storage_write.{pb,grpc.pb}.go`. Same
//          plugin pipeline as `emulator.proto`.
//   - C++: Bazel's `cc_proto_library` + `cc_grpc_library` rules in
//          `proto/BUILD.bazel` emit
//          `storage_write.{pb,grpc.pb}.{h,cc}` into bazel-bin.

// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
// versions:
// - protoc-gen-go-grpc v1.6.2
// - protoc             (unknown)
// source: storage_write.proto

package enginepb

import (
        context "context"
        grpc "google.golang.org/grpc"
        codes "google.golang.org/grpc/codes"
        status "google.golang.org/grpc/status"
)

// This is a compile-time assertion to ensure that this generated file
// is compatible with the grpc package it is being compiled against.
// Requires gRPC-Go v1.64.0 or later.
const _ = grpc.SupportPackageIsVersion9

const (
        StorageWrite_CreateWriteStream_FullMethodName       = "/bigquery_emulator.v1.StorageWrite/CreateWriteStream"
        StorageWrite_AppendRows_FullMethodName              = "/bigquery_emulator.v1.StorageWrite/AppendRows"
        StorageWrite_GetWriteStream_FullMethodName          = "/bigquery_emulator.v1.StorageWrite/GetWriteStream"
        StorageWrite_FinalizeWriteStream_FullMethodName     = "/bigquery_emulator.v1.StorageWrite/FinalizeWriteStream"
        StorageWrite_BatchCommitWriteStreams_FullMethodName = "/bigquery_emulator.v1.StorageWrite/BatchCommitWriteStreams"
        StorageWrite_FlushRows_FullMethodName               = "/bigquery_emulator.v1.StorageWrite/FlushRows"
)

// StorageWriteClient is the client API for StorageWrite service.
//
// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
type StorageWriteClient interface {
        // CreateWriteStream returns a stream handle the caller binds to
        // `AppendRows`. The emulator supports `_default` and `COMMITTED`
        // streams (every flushed batch becomes immediately visible to
        // readers); `BUFFERED` / `PENDING` requests fail with UNIMPLEMENTED
        // until the deferred follow-up lands.
        CreateWriteStream(ctx context.Context, in *CreateWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error)
        // AppendRows is bidirectional-streaming. The first message on a
        // stream MUST set `write_stream`; subsequent messages may leave it
        // empty (the handler keeps the binding from the first message) or
        // re-assert the same name. The handler commits each `proto_rows`
        // batch through `Storage::AppendRows` synchronously and replies with
        // one `AppendRowsResponse` per request before reading the next.
        AppendRows(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[AppendRowsRequest, AppendRowsResponse], error)
        // GetWriteStream returns the stream metadata the engine recorded at
        // CreateWriteStream time. Used by clients to verify the stream
        // type / schema before opening an AppendRows session.
        GetWriteStream(ctx context.Context, in *GetWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error)
        // FinalizeWriteStream marks a stream as closed. The emulator
        // returns UNIMPLEMENTED today; the surface is reserved for the
        // deferred BUFFERED / PENDING follow-up where finalize is the
        // producer's signal to the BatchCommitWriteStreams pass.
        FinalizeWriteStream(ctx context.Context, in *FinalizeWriteStreamRequest, opts ...grpc.CallOption) (*FinalizeWriteStreamResponse, error)
        // BatchCommitWriteStreams atomically commits a set of `PENDING`
        // streams. The emulator returns UNIMPLEMENTED today; reserved for
        // the deferred follow-up.
        BatchCommitWriteStreams(ctx context.Context, in *BatchCommitWriteStreamsRequest, opts ...grpc.CallOption) (*BatchCommitWriteStreamsResponse, error)
        // FlushRows advances the visibility offset on a `BUFFERED` stream.
        // The emulator returns UNIMPLEMENTED today; reserved for the
        // deferred follow-up.
        FlushRows(ctx context.Context, in *FlushRowsRequest, opts ...grpc.CallOption) (*FlushRowsResponse, error)
}

type storageWriteClient struct {
        cc grpc.ClientConnInterface
}

func NewStorageWriteClient(cc grpc.ClientConnInterface) StorageWriteClient {
        return &storageWriteClient{cc}
}

func (c *storageWriteClient) CreateWriteStream(ctx context.Context, in *CreateWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(WriteStream)
        err := c.cc.Invoke(ctx, StorageWrite_CreateWriteStream_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *storageWriteClient) AppendRows(ctx context.Context, opts ...grpc.CallOption) (grpc.BidiStreamingClient[AppendRowsRequest, AppendRowsResponse], error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        stream, err := c.cc.NewStream(ctx, &StorageWrite_ServiceDesc.Streams[0], StorageWrite_AppendRows_FullMethodName, cOpts...)
        if err != nil {
                return nil, err
        }
        x := &grpc.GenericClientStream[AppendRowsRequest, AppendRowsResponse]{ClientStream: stream}
        return x, nil
}

// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageWrite_AppendRowsClient = grpc.BidiStreamingClient[AppendRowsRequest, AppendRowsResponse]

func (c *storageWriteClient) GetWriteStream(ctx context.Context, in *GetWriteStreamRequest, opts ...grpc.CallOption) (*WriteStream, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(WriteStream)
        err := c.cc.Invoke(ctx, StorageWrite_GetWriteStream_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *storageWriteClient) FinalizeWriteStream(ctx context.Context, in *FinalizeWriteStreamRequest, opts ...grpc.CallOption) (*FinalizeWriteStreamResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(FinalizeWriteStreamResponse)
        err := c.cc.Invoke(ctx, StorageWrite_FinalizeWriteStream_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *storageWriteClient) BatchCommitWriteStreams(ctx context.Context, in *BatchCommitWriteStreamsRequest, opts ...grpc.CallOption) (*BatchCommitWriteStreamsResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(BatchCommitWriteStreamsResponse)
        err := c.cc.Invoke(ctx, StorageWrite_BatchCommitWriteStreams_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

func (c *storageWriteClient) FlushRows(ctx context.Context, in *FlushRowsRequest, opts ...grpc.CallOption) (*FlushRowsResponse, error) {
        cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
        out := new(FlushRowsResponse)
        err := c.cc.Invoke(ctx, StorageWrite_FlushRows_FullMethodName, in, out, cOpts...)
        if err != nil {
                return nil, err
        }
        return out, nil
}

// StorageWriteServer is the server API for StorageWrite service.
// All implementations should embed UnimplementedStorageWriteServer
// for forward compatibility.
type StorageWriteServer interface {
        // CreateWriteStream returns a stream handle the caller binds to
        // `AppendRows`. The emulator supports `_default` and `COMMITTED`
        // streams (every flushed batch becomes immediately visible to
        // readers); `BUFFERED` / `PENDING` requests fail with UNIMPLEMENTED
        // until the deferred follow-up lands.
        CreateWriteStream(context.Context, *CreateWriteStreamRequest) (*WriteStream, error)
        // AppendRows is bidirectional-streaming. The first message on a
        // stream MUST set `write_stream`; subsequent messages may leave it
        // empty (the handler keeps the binding from the first message) or
        // re-assert the same name. The handler commits each `proto_rows`
        // batch through `Storage::AppendRows` synchronously and replies with
        // one `AppendRowsResponse` per request before reading the next.
        AppendRows(grpc.BidiStreamingServer[AppendRowsRequest, AppendRowsResponse]) error
        // GetWriteStream returns the stream metadata the engine recorded at
        // CreateWriteStream time. Used by clients to verify the stream
        // type / schema before opening an AppendRows session.
        GetWriteStream(context.Context, *GetWriteStreamRequest) (*WriteStream, error)
        // FinalizeWriteStream marks a stream as closed. The emulator
        // returns UNIMPLEMENTED today; the surface is reserved for the
        // deferred BUFFERED / PENDING follow-up where finalize is the
        // producer's signal to the BatchCommitWriteStreams pass.
        FinalizeWriteStream(context.Context, *FinalizeWriteStreamRequest) (*FinalizeWriteStreamResponse, error)
        // BatchCommitWriteStreams atomically commits a set of `PENDING`
        // streams. The emulator returns UNIMPLEMENTED today; reserved for
        // the deferred follow-up.
        BatchCommitWriteStreams(context.Context, *BatchCommitWriteStreamsRequest) (*BatchCommitWriteStreamsResponse, error)
        // FlushRows advances the visibility offset on a `BUFFERED` stream.
        // The emulator returns UNIMPLEMENTED today; reserved for the
        // deferred follow-up.
        FlushRows(context.Context, *FlushRowsRequest) (*FlushRowsResponse, error)
}

// UnimplementedStorageWriteServer should be embedded to have
// forward compatible implementations.
//
// NOTE: this should be embedded by value instead of pointer to avoid a nil
// pointer dereference when methods are called.
type UnimplementedStorageWriteServer struct{}

func (UnimplementedStorageWriteServer) CreateWriteStream(context.Context, *CreateWriteStreamRequest) (*WriteStream, error) {
        return nil, status.Error(codes.Unimplemented, "method CreateWriteStream not implemented")
}
func (UnimplementedStorageWriteServer) AppendRows(grpc.BidiStreamingServer[AppendRowsRequest, AppendRowsResponse]) error {
        return status.Error(codes.Unimplemented, "method AppendRows not implemented")
}
func (UnimplementedStorageWriteServer) GetWriteStream(context.Context, *GetWriteStreamRequest) (*WriteStream, error) {
        return nil, status.Error(codes.Unimplemented, "method GetWriteStream not implemented")
}
func (UnimplementedStorageWriteServer) FinalizeWriteStream(context.Context, *FinalizeWriteStreamRequest) (*FinalizeWriteStreamResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method FinalizeWriteStream not implemented")
}
func (UnimplementedStorageWriteServer) BatchCommitWriteStreams(context.Context, *BatchCommitWriteStreamsRequest) (*BatchCommitWriteStreamsResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method BatchCommitWriteStreams not implemented")
}
func (UnimplementedStorageWriteServer) FlushRows(context.Context, *FlushRowsRequest) (*FlushRowsResponse, error) {
        return nil, status.Error(codes.Unimplemented, "method FlushRows not implemented")
}
func (UnimplementedStorageWriteServer) testEmbeddedByValue() {}

// UnsafeStorageWriteServer may be embedded to opt out of forward compatibility for this service.
// Use of this interface is not recommended, as added methods to StorageWriteServer will
// result in compilation errors.
type UnsafeStorageWriteServer interface {
        mustEmbedUnimplementedStorageWriteServer()
}

func RegisterStorageWriteServer(s grpc.ServiceRegistrar, srv StorageWriteServer) {
        // If the following call panics, it indicates UnimplementedStorageWriteServer was
        // embedded by pointer and is nil.  This will cause panics if an
        // unimplemented method is ever invoked, so we test this at initialization
        // time to prevent it from happening at runtime later due to I/O.
        if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
                t.testEmbeddedByValue()
        }
        s.RegisterService(&StorageWrite_ServiceDesc, srv)
}

func _StorageWrite_CreateWriteStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(CreateWriteStreamRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageWriteServer).CreateWriteStream(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageWrite_CreateWriteStream_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageWriteServer).CreateWriteStream(ctx, req.(*CreateWriteStreamRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _StorageWrite_AppendRows_Handler(srv interface{}, stream grpc.ServerStream) error {
        return srv.(StorageWriteServer).AppendRows(&grpc.GenericServerStream[AppendRowsRequest, AppendRowsResponse]{ServerStream: stream})
}

// This type alias is provided for backwards compatibility with existing code that references the prior non-generic stream type by name.
type StorageWrite_AppendRowsServer = grpc.BidiStreamingServer[AppendRowsRequest, AppendRowsResponse]

func _StorageWrite_GetWriteStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(GetWriteStreamRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageWriteServer).GetWriteStream(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageWrite_GetWriteStream_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageWriteServer).GetWriteStream(ctx, req.(*GetWriteStreamRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _StorageWrite_FinalizeWriteStream_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(FinalizeWriteStreamRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageWriteServer).FinalizeWriteStream(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageWrite_FinalizeWriteStream_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageWriteServer).FinalizeWriteStream(ctx, req.(*FinalizeWriteStreamRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _StorageWrite_BatchCommitWriteStreams_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(BatchCommitWriteStreamsRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageWriteServer).BatchCommitWriteStreams(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageWrite_BatchCommitWriteStreams_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageWriteServer).BatchCommitWriteStreams(ctx, req.(*BatchCommitWriteStreamsRequest))
        }
        return interceptor(ctx, in, info, handler)
}

func _StorageWrite_FlushRows_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
        in := new(FlushRowsRequest)
        if err := dec(in); err != nil {
                return nil, err
        }
        if interceptor == nil {
                return srv.(StorageWriteServer).FlushRows(ctx, in)
        }
        info := &grpc.UnaryServerInfo{
                Server:     srv,
                FullMethod: StorageWrite_FlushRows_FullMethodName,
        }
        handler := func(ctx context.Context, req interface{}) (interface{}, error) {
                return srv.(StorageWriteServer).FlushRows(ctx, req.(*FlushRowsRequest))
        }
        return interceptor(ctx, in, info, handler)
}

// StorageWrite_ServiceDesc is the grpc.ServiceDesc for StorageWrite service.
// It's only intended for direct use with grpc.RegisterService,
// and not to be introspected or modified (even as a copy)
var StorageWrite_ServiceDesc = grpc.ServiceDesc{
        ServiceName: "bigquery_emulator.v1.StorageWrite",
        HandlerType: (*StorageWriteServer)(nil),
        Methods: []grpc.MethodDesc{
                {
                        MethodName: "CreateWriteStream",
                        Handler:    _StorageWrite_CreateWriteStream_Handler,
                },
                {
                        MethodName: "GetWriteStream",
                        Handler:    _StorageWrite_GetWriteStream_Handler,
                },
                {
                        MethodName: "FinalizeWriteStream",
                        Handler:    _StorageWrite_FinalizeWriteStream_Handler,
                },
                {
                        MethodName: "BatchCommitWriteStreams",
                        Handler:    _StorageWrite_BatchCommitWriteStreams_Handler,
                },
                {
                        MethodName: "FlushRows",
                        Handler:    _StorageWrite_FlushRows_Handler,
                },
        },
        Streams: []grpc.StreamDesc{
                {
                        StreamName:    "AppendRows",
                        Handler:       _StorageWrite_AppendRows_Handler,
                        ServerStreams: true,
                        ClientStreams: true,
                },
        },
        Metadata: "storage_write.proto",
}

// Package connectionfixture seeds EXTERNAL_QUERY snapshot files under
// $data_dir/external/connections/<conn_id>/.
package connectionfixture

import (
        "encoding/json"
        "errors"
        "os"
        "path/filepath"
        "strings"
)

// ManifestEntry maps a query string or alias to a result filename.
type ManifestEntry struct {
        Query  string `json:"query,omitempty" yaml:"query,omitempty"`
        Alias  string `json:"alias,omitempty" yaml:"alias,omitempty"`
        Result string `json:"result"          yaml:"result"`
}

// Manifest is the on-disk queries.yaml / queries.json shape.
type Manifest struct {
        Queries []ManifestEntry `json:"queries" yaml:"queries"`
}

// ResultFile is schema + rows for one EXTERNAL_QUERY snapshot.
type ResultFile struct {
        Schema []Column         `json:"schema" yaml:"schema"`
        Rows   []map[string]any `json:"rows"   yaml:"rows"`
}

// Column is one output field in a fixture result.
type Column struct {
        Name string `json:"name" yaml:"name"`
        Type string `json:"type" yaml:"type"`
}

// CopyTree copies committed fixture files from srcDir into
// dataDir/external/connections/connID/.
func CopyTree(dataDir, connID, srcDir string) error {
        if dataDir == "" || connID == "" || srcDir == "" {
                return errors.New("dataDir, connID, and srcDir are required")
        }
        dst := filepath.Join(dataDir, "external", "connections", connID)
        if err := os.MkdirAll(dst, 0o750); err != nil {
                return err
        }
        return filepath.WalkDir(srcDir, func(path string, d os.DirEntry, walkErr error) error {
                if walkErr != nil {
                        return walkErr
                }
                if d.IsDir() {
                        return nil
                }
                rel, relErr := filepath.Rel(srcDir, path)
                if relErr != nil {
                        return relErr
                }
                outPath := filepath.Join(dst, rel)
                if !isPathWithin(outPath, dst) {
                        return errors.New("fixture path escapes destination directory")
                }
                if mkdirErr := os.MkdirAll(filepath.Dir(outPath), 0o750); mkdirErr != nil {
                        return mkdirErr
                }
                data, readErr := os.ReadFile(path) //nolint:gosec // fixture path under srcDir
                if readErr != nil {
                        return readErr
                }
                if writeErr := os.WriteFile(
                        outPath,
                        data,
                        0o600,
                ); writeErr != nil { //nolint:gosec // outPath validated under dst
                        return writeErr
                }
                return nil
        })
}

// WriteInline materializes manifest + one result file under dataDir.
func WriteInline(dataDir, connID string, manifest Manifest, resultName string, result ResultFile) error {
        if dataDir == "" || connID == "" {
                return errors.New("dataDir and connID are required")
        }
        root := filepath.Join(dataDir, "external", "connections", connID)
        if err := os.MkdirAll(root, 0o750); err != nil {
                return err
        }
        manifestPath := filepath.Join(root, "queries.json")
        manifestRaw, marshalErr := json.MarshalIndent(manifest, "", "  ")
        if marshalErr != nil {
                return marshalErr
        }
        if writeErr := os.WriteFile(manifestPath, manifestRaw, 0o600); writeErr != nil {
                return writeErr
        }
        if resultName == "" {
                resultName = "result.json"
        }
        resultRaw, resultMarshalErr := json.MarshalIndent(result, "", "  ")
        if resultMarshalErr != nil {
                return resultMarshalErr
        }
        return os.WriteFile(filepath.Join(root, resultName), resultRaw, 0o600)
}

func isPathWithin(path, root string) bool {
        absPath, err := filepath.Abs(path)
        if err != nil {
                return false
        }
        absRoot, err := filepath.Abs(root)
        if err != nil {
                return false
        }
        rel, err := filepath.Rel(absRoot, absPath)
        if err != nil {
                return false
        }
        return rel != ".." && !strings.HasPrefix(rel, ".."+string(filepath.Separator))
}

// Package external materializes BigQuery external tables into the
// engine catalog by fetching GCS (fake-gcs), Google Sheets (fixture/live),
// or local snapshot sources and bulk-inserting parsed rows.
package external

import (
        "context"
        "errors"
        "fmt"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
)

// TempDatasetID is the internal dataset ephemeral tableDefinitions are
// registered under when the query omits defaultDataset. The gateway
// sets this as default_dataset_id on the engine QueryRequest so
// unqualified table ids in SQL resolve.
const TempDatasetID = "_bq_external_temp"

// Materialize fetches external source bytes, parses them, and registers
// the destination table with rows in the engine catalog (WRITE_TRUNCATE).
func Materialize(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        target Target,
        cfg *bqtypes.ExternalDataConfiguration,
) error {
        return MaterializeWith(ctx, catalog, target, cfg, nil)
}

// MaterializeWith materializes using an optional per-source resolver.
func MaterializeWith(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        target Target,
        cfg *bqtypes.ExternalDataConfiguration,
        resolver *Resolver,
) error {
        if catalog == nil {
                return errors.New("external: nil CatalogClient")
        }
        if cfg == nil {
                return errors.New("external: externalDataConfiguration is required")
        }
        if err := validateExternalConfig(cfg); err != nil {
                return err
        }
        if target.ProjectID == "" || target.DatasetID == "" || target.TableID == "" {
                return errors.New("external: project, dataset, and table id are required")
        }

        schema := target.Schema
        if schema == nil {
                schema = cfg.Schema
        }
        skip := 0
        if cfg.CsvOptions != nil {
                skip = cfg.CsvOptions.SkipLeadingRows()
        }

        parsed, err := fetchAndParse(ctx, resolver, cfg, schema, skip)
        if err != nil {
                return err
        }
        return registerParsedRows(ctx, catalog, target, schema, parsed)
}

// Target names a catalog table to materialize.
type Target struct {
        ProjectID string
        DatasetID string
        TableID   string
        // Schema from the enclosing Table resource; wins over config.Schema
        // when both are set (permanent external table inserts).
        Schema *bqtypes.TableSchema
}

func registerParsedRows(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        target Target,
        fallbackSchema *bqtypes.TableSchema,
        parsed load.ParsedRows,
) error {
        protoSchema := schemaToProto(parsed.Schema)
        if protoSchema == nil {
                protoSchema = schemaToProto(fallbackSchema)
        }
        if protoSchema == nil || len(protoSchema.GetFields()) == 0 {
                return errors.New("external table requires schema or autodetect=true for CSV")
        }
        if err := ensureDataset(ctx, catalog, target.ProjectID, target.DatasetID); err != nil {
                return err
        }
        tableRef := &enginepb.TableRef{
                ProjectId: target.ProjectID,
                DatasetId: target.DatasetID,
                TableId:   target.TableID,
        }
        if tableExists(ctx, catalog, tableRef) {
                if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
                        return fmt.Errorf("external drop table: %w", err)
                }
        }
        if _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                Table:  tableRef,
                Schema: protoSchema,
        }); err != nil {
                return fmt.Errorf("external register table: %w", err)
        }
        ref := seed.TableRef{
                ProjectID: target.ProjectID,
                DatasetID: target.DatasetID,
                TableID:   target.TableID,
        }
        applier := seed.NewCatalogApplier(catalog)
        if _, err := applier.InsertRows(ctx, ref, protoSchema, parsed.Rows); err != nil {
                return fmt.Errorf("external insert rows: %w", err)
        }
        return nil
}

// PrepareTableDefinitions materializes every ephemeral external table in
// defs. Returns the default dataset id the caller should forward to the
// engine when the query omitted defaultDataset.
func PrepareTableDefinitions(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        projectID string,
        defs map[string]bqtypes.ExternalDataConfiguration,
        defaultDataset string,
) (string, error) {
        return PrepareTableDefinitionsWith(ctx, catalog, projectID, defs, defaultDataset, nil)
}

// PrepareTableDefinitionsWith materializes defs with an optional resolver.
func PrepareTableDefinitionsWith(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        projectID string,
        defs map[string]bqtypes.ExternalDataConfiguration,
        defaultDataset string,
        resolver *Resolver,
) (string, error) {
        if len(defs) == 0 {
                return defaultDataset, nil
        }
        ds := defaultDataset
        if ds == "" {
                ds = TempDatasetID
        }
        for tableID, cfg := range defs {
                cfgCopy := cfg
                if err := MaterializeWith(ctx, catalog, Target{
                        ProjectID: projectID,
                        DatasetID: ds,
                        TableID:   tableID,
                        Schema:    cfg.Schema,
                }, &cfgCopy, resolver); err != nil {
                        return "", err
                }
        }
        if defaultDataset == "" {
                return TempDatasetID, nil
        }
        return defaultDataset, nil
}

func isGoogleSheets(cfg *bqtypes.ExternalDataConfiguration) bool {
        if strings.EqualFold(strings.TrimSpace(cfg.SourceFormat), "GOOGLE_SHEETS") {
                return true
        }
        if cfg.GoogleSheetsOptions != nil {
                return true
        }
        for _, uri := range cfg.SourceURIs {
                if strings.Contains(uri, "docs.google.com/spreadsheets") {
                        return true
                }
        }
        return false
}

func validateExternalConfig(cfg *bqtypes.ExternalDataConfiguration) error {
        if isGoogleSheets(cfg) {
                if len(cfg.SourceURIs) == 0 {
                        return errors.New("google sheets external table requires sourceUri")
                }
                return nil
        }
        if len(cfg.SourceURIs) == 0 {
                return errors.New("external table requires at least one sourceUri")
        }
        for _, uri := range cfg.SourceURIs {
                if IsAzureBlobURI(uri) {
                        return UnsupportedAzureBlobError()
                }
                if IsGoogleDriveURI(uri) {
                        return UnsupportedDriveError()
                }
        }
        if isBigtable(cfg) {
                for _, uri := range cfg.SourceURIs {
                        if err := ValidateBigtableURI(uri); err != nil {
                                return err
                        }
                }
                return nil
        }
        return nil
}

func parseBigtableExternal(schema *bqtypes.TableSchema) (load.ParsedRows, error) {
        if schema != nil && len(schema.Fields) > 0 {
                return load.ParsedRows{Schema: schema, Rows: []map[string]any{}}, nil
        }
        return load.ParsedRows{
                Schema: &bqtypes.TableSchema{
                        Fields: []bqtypes.TableFieldSchema{{Name: "rowkey", Type: "STRING"}},
                },
                Rows: []map[string]any{},
        }, nil
}

func fetchAndParse(
        ctx context.Context,
        resolver *Resolver,
        cfg *bqtypes.ExternalDataConfiguration,
        schema *bqtypes.TableSchema,
        skipLeading int,
) (load.ParsedRows, error) {
        if isGoogleSheets(cfg) {
                return parseSheetsExternal(ctx, resolver, cfg, schema, skipLeading)
        }
        if isBigtable(cfg) {
                return parseBigtableExternal(schema)
        }
        parsed, _, _, err := load.ParseExternalGCS(ctx, cfg, schema, skipLeading)
        return parsed, err
}

func ensureDataset(ctx context.Context, catalog enginepb.CatalogClient, projectID, datasetID string) error {
        applier := seed.NewCatalogApplier(catalog)
        _, err := applier.EnsureDataset(ctx, projectID, datasetID, "US")
        return err
}

func tableExists(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef) bool {
        _, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
        return err == nil
}

func schemaToProto(s *bqtypes.TableSchema) *enginepb.TableSchema {
        if s == nil {
                return nil
        }
        out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.Fields))}
        for i := range s.Fields {
                out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
        }
        return out
}

func fieldToProto(f bqtypes.TableFieldSchema) *enginepb.FieldSchema {
        out := &enginepb.FieldSchema{
                Name:        f.Name,
                Type:        f.Type,
                Mode:        f.Mode,
                Description: f.Description,
        }
        for i := range f.Fields {
                out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
        }
        return out
}

// LoadSourceConfig loads external source resolution rules for dataDir.
func LoadSourceConfig(dataDir string) (*sourceconfig.Config, error) {
        return sourceconfig.Load(dataDir)
}

package external

import (
        "context"
        _ "embed"
        "errors"
        "fmt"
        "io"
        "net/http"
        "net/url"
        "os"
        "path/filepath"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
)

// Public sample sheet (Example Spreadsheet, Class Data tab) used for fixture
// and opt-in live conformance tests.
const (
        ClassDataSheetDocID = "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms"
        classDataSheetGID   = "0"
)

//go:embed fixtures/google_sheets/class_data.csv
var classDataFixtureCSV []byte

// Resolver carries per-source configuration for materialization.
type Resolver struct {
        cfg *sourceconfig.Config
}

// NewResolver returns a resolver; nil cfg uses package defaults.
func NewResolver(cfg *sourceconfig.Config) *Resolver {
        return &Resolver{cfg: cfg}
}

func (r *Resolver) modeSheets(docID string) sourceconfig.Mode {
        if r == nil || r.cfg == nil {
                return sourceconfig.ModeFixture
        }
        return r.cfg.ResolveGoogleSheets(docID)
}

func (r *Resolver) fixturePath(docID, name string) string {
        if r == nil || r.cfg == nil {
                return ""
        }
        return filepath.Join(r.cfg.FixtureRoot(), "google_sheets", docID, name)
}

func fetchGoogleSheetsCSV(
        ctx context.Context,
        r *Resolver,
        cfg *bqtypes.ExternalDataConfiguration,
) ([]byte, error) {
        if len(cfg.SourceURIs) == 0 {
                return nil, errors.New("google sheets external table requires sourceUri")
        }
        docID := sourceconfig.ExtractSheetDocID(cfg.SourceURIs[0])
        if docID == "" {
                return nil, fmt.Errorf("could not parse Google Sheets doc id from %q", cfg.SourceURIs[0])
        }
        mode := r.modeSheets(docID)
        switch mode {
        case sourceconfig.ModeLive:
                return fetchLiveSheetsCSV(ctx, docID, cfg.GoogleSheetsOptions)
        case sourceconfig.ModeLocal:
                return nil, errors.New("google sheets local mode is not supported; use fixture or live")
        default:
                return loadFixtureSheetsCSV(r, docID)
        }
}

func loadFixtureSheetsCSV(r *Resolver, docID string) ([]byte, error) {
        if docID == ClassDataSheetDocID {
                return classDataFixtureCSV, nil
        }
        if r != nil && r.cfg != nil && r.cfg.DataDir != "" {
                for _, name := range []string{"data.csv", "class_data.csv", "sheet.csv"} {
                        p := r.fixturePath(docID, name)
                        if raw, err := os.ReadFile(p); err == nil { //nolint:gosec // operator data dir
                                return raw, nil
                        }
                }
        }
        return nil, fmt.Errorf("no fixture snapshot for Google Sheets doc %s", docID)
}

func fetchLiveSheetsCSV(
        ctx context.Context,
        docID string,
        opts *bqtypes.GoogleSheetsOptions,
) ([]byte, error) {
        gid := classDataSheetGID
        if opts != nil && strings.TrimSpace(opts.Range) != "" {
                // Range like "Class Data!A1:F31" — live export uses gid; public sample uses gid 0.
                _ = opts.Range
        }
        exportURL := fmt.Sprintf(
                "https://docs.google.com/spreadsheets/d/%s/export?format=csv&gid=%s",
                url.PathEscape(docID), url.QueryEscape(gid))
        req, err := http.NewRequestWithContext(ctx, http.MethodGet, exportURL, nil)
        if err != nil {
                return nil, err
        }
        resp, err := http.DefaultClient.Do(req)
        if err != nil {
                return nil, fmt.Errorf("fetch google sheets %s: %w", docID, err)
        }
        defer func() { _ = resp.Body.Close() }()
        if resp.StatusCode != http.StatusOK {
                body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
                return nil, fmt.Errorf("fetch google sheets %s: HTTP %d: %s",
                        docID, resp.StatusCode, strings.TrimSpace(string(body)))
        }
        return io.ReadAll(resp.Body)
}

func parseSheetsExternal(
        ctx context.Context,
        r *Resolver,
        cfg *bqtypes.ExternalDataConfiguration,
        schema *bqtypes.TableSchema,
        skipLeading int,
) (load.ParsedRows, error) {
        data, err := fetchGoogleSheetsCSV(ctx, r, cfg)
        if err != nil {
                return load.ParsedRows{}, err
        }
        skip := skipLeading
        if cfg.GoogleSheetsOptions != nil && cfg.GoogleSheetsOptions.SkipLeadingRows() > 0 {
                skip = cfg.GoogleSheetsOptions.SkipLeadingRows()
        } else if skip == 0 {
                skip = 1
        }
        return load.ParseSource("CSV", data, schema, skip, cfg.Autodetect)
}

// Package sourceconfig resolves external data sources to fixture, local, or
// live modes. Defaults favor deterministic offline behavior; live upstream
// fetches are strictly opt-in per source kind or identity.
package sourceconfig

import (
        "errors"
        "os"
        "path/filepath"
        "strings"

        "gopkg.in/yaml.v3"
)

// Mode is how an external source identity is resolved at fetch time.
type Mode string

const (
        // ModeFixture reads committed snapshots under DataDir/external/fixtures/.
        ModeFixture Mode = "fixture"
        // ModeLocal uses fake-gcs (STORAGE_EMULATOR_HOST) or local file paths.
        ModeLocal Mode = "local"
        // ModeLive reaches a real upstream (credentials / network gated).
        ModeLive Mode = "live"
)

// Kind classifies an external source for default-mode lookup.
type Kind string

const (
        KindGCS          Kind = "gcs"
        KindGoogleSheets Kind = "google_sheets"
        KindConnection   Kind = "connection"
)

// Config holds per-source resolution rules loaded from DataDir and env vars.
type Config struct {
        DataDir string
        // defaults by kind; overridden per source id in Sources.
        defaults map[Kind]Mode
        sources  map[string]Mode
}

type fileShape struct {
        Defaults map[string]string      `yaml:"defaults"`
        Sources  map[string]sourceEntry `yaml:"sources"`
}

type sourceEntry struct {
        Kind string `yaml:"kind"`
        Mode string `yaml:"mode"`
}

// Load builds a Config from dataDir and optional external_sources.yaml.
// When dataDir is empty, only env-based overrides apply.
func Load(dataDir string) (*Config, error) {
        c := &Config{
                DataDir: dataDir,
                defaults: map[Kind]Mode{
                        KindGCS:          ModeLocal,
                        KindGoogleSheets: ModeFixture,
                        KindConnection:   ModeFixture,
                },
                sources: map[string]Mode{},
        }
        if dataDir != "" {
                if err := c.loadYAMLFile(dataDir); err != nil {
                        return nil, err
                }
        }
        c.applyEnvOverrides()
        return c, nil
}

func (c *Config) loadYAMLFile(dataDir string) error {
        path := filepath.Join(dataDir, "external_sources.yaml")
        raw, err := os.ReadFile(path) //nolint:gosec // operator-controlled data dir
        if err != nil {
                if errors.Is(err, os.ErrNotExist) {
                        return nil
                }
                return err
        }
        var f fileShape
        if err := yaml.Unmarshal(raw, &f); err != nil {
                return err
        }
        for k, v := range f.Defaults {
                if m := parseMode(v); m != "" {
                        c.defaults[parseKind(k)] = m
                }
        }
        for id, ent := range f.Sources {
                if m := parseMode(ent.Mode); m != "" {
                        c.sources[normalizeID(id)] = m
                }
        }
        return nil
}

func (c *Config) applyEnvOverrides() {
        if truthy(os.Getenv("BIGQUERY_EMULATOR_LIVE_SHEETS")) {
                c.defaults[KindGoogleSheets] = ModeLive
        }
        if v := strings.TrimSpace(os.Getenv("BIGQUERY_EMULATOR_EXTERNAL_GCS_MODE")); v != "" {
                if m := parseMode(v); m != "" {
                        c.defaults[KindGCS] = m
                }
        }
        if v := strings.TrimSpace(os.Getenv("BIGQUERY_EMULATOR_EXTERNAL_CONNECTIONS_MODE")); v != "" {
                if m := parseMode(v); m != "" {
                        c.defaults[KindConnection] = m
                }
        }
}

func truthy(s string) bool {
        switch strings.ToLower(strings.TrimSpace(s)) {
        case "1", "true", "yes", "on":
                return true
        default:
                return false
        }
}

func parseMode(s string) Mode {
        switch strings.ToLower(strings.TrimSpace(s)) {
        case string(ModeFixture):
                return ModeFixture
        case string(ModeLocal):
                return ModeLocal
        case string(ModeLive):
                return ModeLive
        default:
                return ""
        }
}

func parseKind(s string) Kind {
        switch strings.ToLower(strings.TrimSpace(s)) {
        case "gcs":
                return KindGCS
        case "google_sheets", "googlesheets", "sheets":
                return KindGoogleSheets
        case "connection", "connections":
                return KindConnection
        default:
                return Kind(s)
        }
}

func normalizeID(id string) string {
        return strings.ToLower(strings.TrimSpace(id))
}

// ResolveGCS returns the mode for a gs:// URI.
func (c *Config) ResolveGCS(uri string) Mode {
        if c == nil {
                return ModeLocal
        }
        if m, ok := c.sources[normalizeID(uri)]; ok {
                return m
        }
        bucket := gcsBucket(uri)
        if bucket != "" {
                if m, ok := c.sources[normalizeID(bucket)]; ok {
                        return m
                }
        }
        return c.defaults[KindGCS]
}

// ResolveGoogleSheets returns the mode for a Sheets doc id or URL.
func (c *Config) ResolveGoogleSheets(docOrURL string) Mode {
        if c == nil {
                return ModeFixture
        }
        id := ExtractSheetDocID(docOrURL)
        if id == "" {
                id = docOrURL
        }
        if m, ok := c.sources[normalizeID(id)]; ok {
                return m
        }
        return c.defaults[KindGoogleSheets]
}

// ResolveConnection returns the mode for a connection resource name or id.
func (c *Config) ResolveConnection(name string) Mode {
        if c == nil {
                return ModeFixture
        }
        id := connectionID(name)
        if m, ok := c.sources[normalizeID(id)]; ok {
                return m
        }
        if m, ok := c.sources[normalizeID(name)]; ok {
                return m
        }
        return c.defaults[KindConnection]
}

// FixtureRoot returns the directory for committed external snapshots.
func (c *Config) FixtureRoot() string {
        if c == nil || c.DataDir == "" {
                return ""
        }
        return filepath.Join(c.DataDir, "external", "fixtures")
}

// GCSCacheRoot returns the directory where gs:// objects are materialized
// for engine LOAD/EXPORT and offline snapshots.
func (c *Config) GCSCacheRoot() string {
        if c == nil || c.DataDir == "" {
                return ""
        }
        return filepath.Join(c.DataDir, "external", "gcs-cache")
}

// ConnectionFixtureRoot returns fixture SQL result files for EXTERNAL_QUERY.
func (c *Config) ConnectionFixtureRoot() string {
        if c == nil || c.DataDir == "" {
                return ""
        }
        return filepath.Join(c.DataDir, "external", "connections")
}

func gcsBucket(uri string) string {
        rest := strings.TrimPrefix(uri, "gs://")
        if i := strings.Index(rest, "/"); i > 0 {
                return rest[:i]
        }
        return ""
}

func connectionID(name string) string {
        name = strings.TrimSpace(name)
        if i := strings.LastIndex(name, "/"); i >= 0 {
                return name[i+1:]
        }
        // EXTERNAL_QUERY connection arg may be region.id
        if i := strings.LastIndex(name, "."); i >= 0 {
                return name[i+1:]
        }
        return name
}

// ExtractSheetDocID parses a Google Sheets URL or bare doc id.
func ExtractSheetDocID(uri string) string {
        uri = strings.TrimSpace(uri)
        if uri == "" {
                return ""
        }
        const marker = "/d/"
        if _, after, ok := strings.Cut(uri, marker); ok {
                rest := after
                if j := strings.IndexAny(rest, "/#?"); j >= 0 {
                        return rest[:j]
                }
                return rest
        }
        if !strings.Contains(uri, "/") && !strings.Contains(uri, "://") {
                return uri
        }
        return ""
}

package external

import (
        "errors"
        "fmt"
        "slices"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

const sourceFormatBigtable = "BIGTABLE"

// IsBigtableSourceFormat reports whether format is BIGTABLE.
func IsBigtableSourceFormat(format string) bool {
        return strings.EqualFold(strings.TrimSpace(format), sourceFormatBigtable)
}

// IsBigtableSourceURI reports whether uri is a Bigtable REST source URI.
func IsBigtableSourceURI(uri string) bool {
        u := strings.TrimSpace(uri)
        return strings.Contains(u, "googleapis.com/bigtable/") ||
                strings.HasPrefix(u, "https://bigtable.googleapis.com/")
}

// ValidateBigtableURI checks the canonical Bigtable external URI shape.
func ValidateBigtableURI(uri string) error {
        if !IsBigtableSourceURI(uri) {
                return fmt.Errorf("invalid Bigtable sourceUri: %q", uri)
        }
        if !strings.Contains(uri, "/projects/") ||
                !strings.Contains(uri, "/instances/") ||
                !strings.Contains(uri, "/tables/") {
                return fmt.Errorf(
                        "invalid Bigtable sourceUri (expected .../projects/P/instances/I/tables/T): %q",
                        uri,
                )
        }
        return nil
}

// IsAzureBlobURI reports Azure Blob / ADLS URIs the UI may submit.
func IsAzureBlobURI(uri string) bool {
        u := strings.TrimSpace(strings.ToLower(uri))
        return strings.HasPrefix(u, "azure://") ||
                strings.Contains(u, ".blob.core.windows.net/") ||
                strings.Contains(u, ".dfs.core.windows.net/")
}

// IsGoogleDriveURI reports non-Sheets Google Drive URIs.
func IsGoogleDriveURI(uri string) bool {
        u := strings.TrimSpace(uri)
        return strings.Contains(u, "drive.google.com/") &&
                !strings.Contains(u, "spreadsheets")
}

// UnsupportedAzureBlobError is returned for Azure external-table URIs.
func UnsupportedAzureBlobError() error {
        return errors.New("azure blob storage external tables are not supported in the emulator")
}

// UnsupportedDriveError is returned for Google Drive file URIs.
func UnsupportedDriveError() error {
        return errors.New(
                "google drive external tables are not supported in the emulator (use GOOGLE_SHEETS for spreadsheets)",
        )
}

func isBigtable(cfg *bqtypes.ExternalDataConfiguration) bool {
        if IsBigtableSourceFormat(cfg.SourceFormat) {
                return true
        }
        return slices.ContainsFunc(cfg.SourceURIs, IsBigtableSourceURI)
}

// Package extract implements synchronous BigQuery EXTRACT jobs.
package extract

import (
        "bytes"
        "compress/gzip"
        "context"
        "encoding/csv"
        "encoding/json"
        "errors"
        "fmt"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
)

const listPageSize = 10_000

// Result captures extract-job statistics.
type Result struct {
        InputBytes               int64
        DestinationURIFileCounts []int64
}

// Execute runs a synchronous EXTRACT job.
func Execute(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationExtract,
        defaultProject string,
) (Result, error) {
        if cfg == nil {
                return Result{}, errors.New("extract configuration is required")
        }
        if cfg.SourceTable == nil || cfg.SourceTable.TableID == "" {
                return Result{}, errors.New("sourceTable.tableId is required")
        }
        if len(cfg.DestinationURIs) == 0 {
                return Result{}, errors.New("destinationUris is required")
        }

        projectID := cfg.SourceTable.ProjectID
        if projectID == "" {
                projectID = defaultProject
        }
        datasetID := cfg.SourceTable.DatasetID
        tableID := cfg.SourceTable.TableID

        ref := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
        desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
        if err != nil {
                return Result{}, fmt.Errorf("source table: %w", err)
        }
        schema := desc.GetSchema()
        rows, err := listAllRows(ctx, catalog, ref)
        if err != nil {
                return Result{}, err
        }

        format := cfg.DestinationFormat
        if format == "" {
                format = "CSV"
        }
        payload, contentType, err := serializeRows(schema, rows, format)
        if err != nil {
                return Result{}, err
        }
        payload, contentType, err = maybeGzip(cfg.Compression, payload, contentType)
        if err != nil {
                return Result{}, err
        }
        counts, err := uploadDestinations(ctx, cfg.DestinationURIs, contentType, payload)
        if err != nil {
                return Result{}, err
        }
        return Result{
                InputBytes:               int64(len(payload)),
                DestinationURIFileCounts: counts,
        }, nil
}

func maybeGzip(compression string, payload []byte, contentType string) ([]byte, string, error) {
        if !strings.EqualFold(compression, "GZIP") {
                return payload, contentType, nil
        }
        out, err := gzipBytes(payload)
        if err != nil {
                return nil, "", err
        }
        return out, "application/gzip", nil
}

func uploadDestinations(ctx context.Context, uris []string, contentType string, payload []byte) ([]int64, error) {
        counts := make([]int64, len(uris))
        for i, uri := range uris {
                if err := load.PutGCS(ctx, uri, contentType, payload); err != nil {
                        return nil, err
                }
                counts[i] = 1
        }
        return counts, nil
}

func listAllRows(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef,
) ([]*enginepb.DataRow, error) {
        var out []*enginepb.DataRow
        start := int64(0)
        for {
                resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                        Table:      ref,
                        StartIndex: start,
                        MaxResults: listPageSize,
                })
                if err != nil {
                        return nil, err
                }
                rows := resp.GetRows()
                if len(rows) == 0 {
                        break
                }
                out = append(out, rows...)
                start += int64(len(rows))
                if start >= resp.GetTotalRows() {
                        break
                }
        }
        return out, nil
}

func serializeRows(schema *enginepb.TableSchema, rows []*enginepb.DataRow, format string) ([]byte, string, error) {
        switch strings.ToUpper(format) {
        case "CSV":
                return serializeCSV(schema, rows)
        case "NEWLINE_DELIMITED_JSON":
                return serializeNDJSON(schema, rows)
        default:
                return nil, "", fmt.Errorf("unsupported destinationFormat %q", format)
        }
}

func serializeCSV(schema *enginepb.TableSchema, rows []*enginepb.DataRow) ([]byte, string, error) {
        var buf bytes.Buffer
        w := csv.NewWriter(&buf)
        fields := schema.GetFields()
        header := make([]string, len(fields))
        for i, f := range fields {
                header[i] = f.GetName()
        }
        if err := w.Write(header); err != nil {
                return nil, "", err
        }
        for _, row := range rows {
                record := make([]string, len(fields))
                cells := row.GetCells()
                for i := range fields {
                        if i < len(cells) {
                                record[i] = cellString(cells[i])
                        }
                }
                if err := w.Write(record); err != nil {
                        return nil, "", err
                }
        }
        w.Flush()
        if err := w.Error(); err != nil {
                return nil, "", err
        }
        return buf.Bytes(), "text/csv", nil
}

func serializeNDJSON(schema *enginepb.TableSchema, rows []*enginepb.DataRow) ([]byte, string, error) {
        var buf bytes.Buffer
        fields := schema.GetFields()
        for _, row := range rows {
                obj := make(map[string]any, len(fields))
                cells := row.GetCells()
                for i, f := range fields {
                        if i < len(cells) {
                                obj[f.GetName()] = cellJSONValue(cells[i], f.GetType())
                        }
                }
                line, err := json.Marshal(obj)
                if err != nil {
                        return nil, "", err
                }
                buf.Write(line)
                buf.WriteByte('\n')
        }
        return buf.Bytes(), "application/json", nil
}

func cellString(c *enginepb.Cell) string {
        if c == nil || c.GetNullValue() {
                return ""
        }
        return c.GetStringValue()
}

func cellJSONValue(c *enginepb.Cell, typ string) any {
        if c == nil || c.GetNullValue() {
                return nil
        }
        s := c.GetStringValue()
        switch strings.ToUpper(typ) {
        case "INTEGER", "INT64":
                if n, err := strconv.ParseInt(s, 10, 64); err == nil {
                        return n
                }
        case "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC":
                if f, err := strconv.ParseFloat(s, 64); err == nil {
                        return f
                }
        case "BOOLEAN", "BOOL":
                return s == "true"
        }
        return s
}

func gzipBytes(data []byte) ([]byte, error) {
        var buf bytes.Buffer
        zw := gzip.NewWriter(&buf)
        if _, err := zw.Write(data); err != nil {
                return nil, err
        }
        if err := zw.Close(); err != nil {
                return nil, err
        }
        return buf.Bytes(), nil
}

// FormatStatistics maps Result into jobs.ExtractStatistics.
func FormatStatistics(r Result) *jobs.ExtractStatistics {
        counts := make([]string, len(r.DestinationURIFileCounts))
        for i, c := range r.DestinationURIFileCounts {
                counts[i] = strconv.FormatInt(c, 10)
        }
        return &jobs.ExtractStatistics{
                DestinationURIFileCounts: counts,
                InputBytes:               strconv.FormatInt(r.InputBytes, 10),
        }
}

// Package gateway runs the BigQuery emulator's REST gateway and manages
// the lifecycle of the C++ engine subprocess.
//
// The flow mirrors cloud-spanner-emulator's gateway:
//
//  1. Optionally spawn the engine binary, wiring its stdout/stderr.
//  2. Wait for the engine's gRPC port to become reachable.
//  3. Start the HTTP server that serves the BigQuery REST API.
//  4. On SIGINT/SIGTERM, shut down both cleanly.
package gateway

import (
        "context"
        "errors"
        "fmt"
        "log/slog"
        "net/http"
        "os"
        "os/exec"
        "os/signal"
        "syscall"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/grpcserver"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
)

// engineReadyTimeout bounds how long Gateway.Run will wait for the engine
// subprocess's gRPC health service to report SERVING before giving up.
// 30s is generous: a debug build of the engine takes <1s to bind and
// flip to SERVING on a developer laptop, but CI cold-starts and
// container builds sometimes spend 5-10s in linker/loader before main()
// runs.
const engineReadyTimeout = 30 * time.Second

// Options configures the gateway.
type Options struct {
        // HTTPAddress is the host:port the REST gateway listens on, e.g.
        // "localhost:9050".
        HTTPAddress string

        // EngineAddress is the host:port of the internal C++ engine gRPC
        // server, e.g. "localhost:9061". The Go gateway forwards SQL work
        // and the bqstorage shim's engine client to this address.
        EngineAddress string

        // StorageGRPCAddress is the host:port where the gateway registers
        // the public google.cloud.bigquery.storage.v1 BigQueryRead /
        // BigQueryWrite services, e.g. "localhost:9060". Client libraries
        // dial BIGQUERY_STORAGE_GRPC_ENDPOINT here.
        StorageGRPCAddress string

        // EngineBinary is the path to the C++ engine binary. If empty, the
        // gateway runs without an engine (useful early on while the engine
        // is still being scaffolded; queries will return Unimplemented).
        EngineBinary string

        // EngineArgs is the additional flag list passed to the engine
        // subprocess after `--host_port`. Use this to forward
        // `--data_dir` (and any future engine-level flags) from
        // gateway-level CLI flags through to `emulator_main` without
        // the gateway needing to know each flag's semantics.
        EngineArgs []string

        // CopyEngineStdout / CopyEngineStderr forward the engine subprocess's
        // streams to the gateway's own streams.
        CopyEngineStdout bool
        CopyEngineStderr bool

        // LogRequests prints each REST request and response.
        LogRequests bool

        // DefaultProjectID is the project clients are assumed to be acting
        // against when seeding or other gateway-level operations need a
        // fallback project. Mirrors `--project-id` on gateway_main.
        DefaultProjectID string

        // DefaultDatasetID is the server-level fallback dataset used to
        // resolve unqualified table names when a query/job does not carry
        // its own `defaultDataset`. Mirrors setting `default_dataset` on a
        // production BigQuery client/job. Empty means no fallback (bare
        // table names error, exactly like production with no default set).
        // Mirrors `--dataset` on gateway_main.
        DefaultDatasetID string

        // DefaultDatasetLocation is the BigQuery location used as the
        // fallback when a dataset is created without an explicit location
        // (US, EU, regional). Mirrors `--default-dataset-location`.
        DefaultDatasetLocation string

        // EnableSeedAPI registers `POST /api/emulator/seed` and the
        // matching `GET .../operations/{operationId}` endpoints so a
        // caller can copy live production BigQuery metadata + rows into
        // this emulator. Default false (off) for local safety.
        EnableSeedAPI bool

        // SeedAPIAllowRemote allows non-loopback callers to hit the seed
        // API when true. When false (the default), seed routes refuse
        // any request whose RemoteAddr is not loopback.
        SeedAPIAllowRemote bool

        // SeedAPISeedToken, when non-empty, requires matching header
        // `X-BigQuery-Emulator-Seed-Token` on every seed API request.
        // Loaded from `BIGQUERY_EMULATOR_SEED_TOKEN` when the flag is
        // empty (see binaries/gateway_main).
        SeedAPISeedToken string

        // SeedFiles is the optional list of YAML seed-data file paths
        // the gateway applies after the engine reports SERVING but
        // before it starts accepting public traffic. See
        // gateway/seedfile for the schema.
        SeedFiles []string

        // EnableSQLToolsAPI registers POST /api/emulator/sql/{format,parse,
        // tokenize,complete} for downstream UIs. Off by default.
        EnableSQLToolsAPI bool

        // SQLToolsAPIAllowRemote allows non-loopback callers when true.
        SQLToolsAPIAllowRemote bool

        // SQLToolsAPISeedToken requires matching header
        // X-BigQuery-Emulator-SqlTools-Token when non-empty.
        SQLToolsAPISeedToken string

        // DataDir is the persistent storage root the engine uses for
        // the DuckDB catalog + table data. Mirrors `--data-dir`; the
        // gateway passes it through via `--data_dir` in EngineArgs.
        DataDir string

        // InitialDataDir is an optional template directory the gateway
        // copies into DataDir on startup when DataDir does not yet
        // contain an initialized catalog (`catalog.duckdb` missing).
        // Mirrors `--initial-data-dir` on gateway_main.
        InitialDataDir string

        // Debug enables verbose request and lifecycle logging.
        Debug bool

        // Logger is the structured logger the gateway emits lifecycle and
        // request events to. When nil, the gateway logs to a discard
        // handler so callers that want silent embedding (unit tests, the
        // shallow-emulator harness) get zero output without having to
        // build their own no-op logger. Production binaries (see
        // binaries/gateway_main) wire a real *slog.Logger here so the
        // emulator's structured logs surface in stderr / stackdriver.
        Logger *slog.Logger
}

// Gateway is the top-level BigQuery emulator gateway.
type Gateway struct {
        opts       Options
        logger     *slog.Logger
        engine     *exec.Cmd
        engineDone chan struct{}

        // engineClient is the long-lived gRPC channel to the engine
        // subprocess. nil when EngineBinary is empty (gateway-only stub mode).
        engineClient *engine.Client

        // preStartHook runs once just before the engine subprocess is
        // spawned. Use it for filesystem prep that must complete before
        // the engine touches DataDir (e.g. materializing a template tree
        // into an empty data directory).
        preStartHook func(Options) error

        // postEngineHook runs once after the engine reports SERVING but
        // before the gateway begins serving HTTP traffic. Use it for
        // startup-time seeding from YAML files that needs the
        // CatalogClient to be reachable.
        postEngineHook func(Options, *engine.Client) error

        // storageGRPC is the public BigQuery Storage listener (nil when
        // StorageGRPCAddress is empty).
        storageGRPC *grpcserver.Server
}

// New constructs a Gateway. Run actually starts it.
func New(opts Options) *Gateway {
        logger := opts.Logger
        if logger == nil {
                logger = slog.New(slog.DiscardHandler)
        }
        return &Gateway{opts: opts, logger: logger}
}

// WithPreStartHook installs a callback executed once before the engine
// subprocess is spawned. The hook runs synchronously on the Run
// goroutine and a non-nil error aborts startup without touching the
// engine.
func (g *Gateway) WithPreStartHook(hook func(Options) error) *Gateway {
        g.preStartHook = hook
        return g
}

// WithPostEngineHook installs a callback executed once after the
// engine reports SERVING but before the HTTP gateway accepts traffic.
// The hook receives the long-lived *engine.Client so it can use the
// CatalogClient / QueryClient to mutate state (e.g. apply YAML seed
// files). A non-nil error from the hook tears down the engine and
// aborts Run.
func (g *Gateway) WithPostEngineHook(hook func(Options, *engine.Client) error) *Gateway {
        g.postEngineHook = hook
        return g
}

// Run starts the engine subprocess (if configured) and the HTTP server,
// then blocks until either terminates or a signal arrives.
func (g *Gateway) Run() error {
        ctx := context.Background()
        if g.preStartHook != nil {
                if err := g.preStartHook(g.opts); err != nil {
                        return fmt.Errorf("pre-start hook: %w", err)
                }
        }

        if err := g.startEngine(ctx); err != nil {
                return fmt.Errorf("start engine: %w", err)
        }

        if g.postEngineHook != nil {
                if err := g.postEngineHook(g.opts, g.engineClient); err != nil {
                        g.stopEngine()
                        return fmt.Errorf("post-engine hook: %w", err)
                }
        }

        deps := handlers.BuildDependenciesWith(g.engineClient, handlers.DepsOptions{
                DataDir:          g.opts.DataDir,
                DefaultDatasetID: g.opts.DefaultDatasetID,
        })

        if err := g.startStorageGRPC(ctx, deps); err != nil {
                g.stopEngine()
                return fmt.Errorf("start storage grpc: %w", err)
        }

        srv := &http.Server{
                Addr:              g.opts.HTTPAddress,
                Handler:           NewServer(g.opts, deps, g.engineClient),
                ReadHeaderTimeout: 10 * time.Second,
        }

        errCh := make(chan error, 1)
        go func() {
                g.logStartupExpectations(ctx)
                err := srv.ListenAndServe()
                if err != nil && !errors.Is(err, http.ErrServerClosed) {
                        errCh <- err
                        return
                }
                errCh <- nil
        }()

        sigCh := make(chan os.Signal, 1)
        signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM)

        return g.waitForShutdown(ctx, srv, errCh, sigCh)
}

// startEngine spawns the C++ engine subprocess if one is configured and
// waits for it to come up. It is a no-op when EngineBinary is empty.
func (g *Gateway) startEngine(ctx context.Context) error {
        if g.opts.EngineBinary == "" {
                return nil
        }

        args := []string{
                "--host_port", g.opts.EngineAddress,
        }
        args = append(args, g.opts.EngineArgs...)
        // #nosec G204 -- engine binary path is operator-supplied via
        // --engine_binary.
        cmd := exec.Command(g.opts.EngineBinary, args...)
        if g.opts.CopyEngineStdout {
                cmd.Stdout = os.Stdout
        }
        if g.opts.CopyEngineStderr {
                cmd.Stderr = os.Stderr
        }
        if err := cmd.Start(); err != nil {
                return fmt.Errorf("start %s: %w", g.opts.EngineBinary, err)
        }
        g.engine = cmd
        g.engineDone = make(chan struct{})

        go func() {
                err := cmd.Wait()
                close(g.engineDone)
                if err != nil {
                        g.logger.WarnContext(ctx, "engine subprocess exited",
                                slog.Any("err", err))
                }
        }()

        if err := g.connectAndWaitForEngine(ctx); err != nil {
                return err
        }
        return nil
}

// connectAndWaitForEngine dials the engine's gRPC port and polls
// grpc.health.v1.Health.Check until it reports SERVING (or
// engineReadyTimeout fires). Replaces the earlier sleep-and-pray
// stub with a real readiness probe so the gateway's HTTP listener
// never accepts traffic before the engine is actually able to answer
// it.
//
// Stores the live *engine.Client on the receiver for the lifetime of
// the gateway; the connection is reused for every business RPC and torn
// down by stopEngine.
func (g *Gateway) connectAndWaitForEngine(ctx context.Context) error {
        client, err := engine.Dial(g.opts.EngineAddress)
        if err != nil {
                return fmt.Errorf("dial engine at %s: %w", g.opts.EngineAddress, err)
        }

        readyCtx, cancel := context.WithTimeout(ctx, engineReadyTimeout)
        defer cancel()

        if err := client.WaitForReady(readyCtx); err != nil {
                _ = client.Close()
                return fmt.Errorf("wait for engine ready at %s: %w", g.opts.EngineAddress, err)
        }
        g.engineClient = client
        g.logger.InfoContext(ctx, "engine grpc serving",
                slog.String("addr", g.opts.EngineAddress))
        return nil
}

func (g *Gateway) stopStorageGRPC() {
        if g.storageGRPC != nil {
                _ = g.storageGRPC.Close()
                g.storageGRPC = nil
        }
}

func (g *Gateway) stopEngine() {
        if g.engineClient != nil {
                _ = g.engineClient.Close()
                g.engineClient = nil
        }
        if g.engine == nil || g.engine.Process == nil {
                return
        }
        _ = g.engine.Process.Signal(os.Interrupt)
        select {
        case <-g.engineDone:
        case <-time.After(5 * time.Second):
                _ = g.engine.Process.Kill()
                <-g.engineDone
        }
}

package gateway

import (
        "context"
        "log/slog"
        "net/http"
        "os"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/grpcserver"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
)

func (g *Gateway) startStorageGRPC(ctx context.Context, deps handlers.Dependencies) error {
        if g.opts.StorageGRPCAddress == "" {
                return nil
        }
        grpcSrv, err := grpcserver.Start(g.opts.StorageGRPCAddress, g.engineClient, deps)
        if err != nil {
                return err
        }
        g.storageGRPC = grpcSrv
        go func() {
                if serveErr := grpcSrv.Serve(); serveErr != nil {
                        g.logger.WarnContext(ctx, "storage grpc server exited", slog.Any("err", serveErr))
                }
        }()
        g.logger.InfoContext(ctx, "storage grpc listening",
                slog.String("addr", g.opts.StorageGRPCAddress))
        return nil
}

func (g *Gateway) waitForShutdown(
        ctx context.Context,
        srv *http.Server,
        errCh <-chan error,
        sigCh <-chan os.Signal,
) error {
        select {
        case err := <-errCh:
                g.stopStorageGRPC()
                g.stopEngine()
                return err
        case sig := <-sigCh:
                g.logger.InfoContext(ctx, "shutting down on signal",
                        slog.String("signal", sig.String()))
                shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
                defer cancel()
                _ = srv.Shutdown(shutdownCtx)
                g.stopStorageGRPC()
                g.stopEngine()
                return nil
        }
}

func (g *Gateway) logStartupExpectations(ctx context.Context) {
        g.logger.InfoContext(ctx, "gateway listening",
                slog.String("addr", g.opts.HTTPAddress))
        switch {
        case g.opts.EngineBinary != "":
                g.logger.InfoContext(ctx, "engine grpc expected",
                        slog.String("addr", g.opts.EngineAddress))
                if g.opts.StorageGRPCAddress != "" {
                        g.logger.InfoContext(ctx, "public storage grpc expected",
                                slog.String("addr", g.opts.StorageGRPCAddress))
                }
        default:
                g.logger.InfoContext(ctx, "engine subprocess disabled; query routes will return Unimplemented")
        }
}

package grpcserver

import (
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers/bqanalyticshub"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers/bqconnection"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers/bqreservation"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers/bqstorage"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers/bqv2grpc"
        "google.golang.org/grpc"
)

// RegisterAll wires every public gRPC surface the gateway exposes on the
// storage listener: BigQuery Storage Read/Write, Connection, Reservation,
// Analytics Hub, and BigQuery v2 resource services.
func RegisterAll(srv grpc.ServiceRegistrar, eng *engine.Client, deps handlers.Dependencies) {
        if srv == nil {
                return
        }
        bqstorage.RegisterGRPC(srv, eng)
        bqconnection.RegisterGRPC(srv, deps)
        bqreservation.RegisterGRPC(srv)
        bqanalyticshub.RegisterGRPC(srv)
        bqv2grpc.RegisterGRPC(srv, deps)
}

// Package grpcserver hosts the public BigQuery Storage gRPC surface on the
// gateway process. Official client libraries dial
// google.cloud.bigquery.storage.v1.BigQueryRead / BigQueryWrite; the shim
// in gateway/handlers/bqstorage adapts those RPCs to the engine's internal
// bigquery_emulator.v1.StorageRead / StorageWrite contracts.
package grpcserver

import (
        "errors"
        "fmt"
        "net"

        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "google.golang.org/grpc"
        "google.golang.org/grpc/credentials/insecure"
)

// Server wraps the public Storage gRPC listener the gateway owns.
type Server struct {
        srv *grpc.Server
        lis net.Listener
}

// Start binds address and registers every public gRPC surface the gateway
// exposes (Storage, Connection, Reservation, Analytics Hub, BigQuery v2).
// eng may be nil in gateway-only mode; storage RPCs then return UNAVAILABLE.
func Start(address string, eng *engine.Client, deps handlers.Dependencies) (*Server, error) {
        if address == "" {
                return nil, errors.New("grpcserver: empty address")
        }
        lis, err := net.Listen("tcp", address)
        if err != nil {
                return nil, fmt.Errorf("grpcserver: listen %s: %w", address, err)
        }
        srv := grpc.NewServer(grpc.Creds(insecure.NewCredentials()))
        RegisterAll(srv, eng, deps)
        return &Server{srv: srv, lis: lis}, nil
}

// Serve blocks until the server stops or the listener fails.
func (s *Server) Serve() error {
        if s == nil || s.srv == nil || s.lis == nil {
                return errors.New("grpcserver: server not initialized")
        }
        return s.srv.Serve(s.lis)
}

// Stop gracefully shuts down the gRPC server.
func (s *Server) Stop() {
        if s == nil || s.srv == nil {
                return
        }
        s.srv.GracefulStop()
}

// Close stops the server and closes the listener.
func (s *Server) Close() error {
        if s == nil {
                return nil
        }
        s.Stop()
        if s.lis != nil {
                return s.lis.Close()
        }
        return nil
}

package handlers

import (
        "net/http"
        "regexp"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

// abortSessionRE matches `CALL BQ.ABORT_SESSION([session_id])` system
// procedure calls bigframes issues when closing a session.
var abortSessionRE = regexp.MustCompile(
        `(?is)^\s*CALL\s+BQ\.ABORT_SESSION\s*(?:\(\s*(?:'([^']*)'|SESSION_ID\s*\(\s*\))?\s*\))?\s*;?\s*$`)

func parseAbortSessionSQL(sql string) bool {
        return abortSessionRE.MatchString(strings.TrimSpace(sql))
}

// handleAbortSessionQuery is a no-op stub for BQ.ABORT_SESSION so bigframes
// session teardown succeeds against the emulator.
func handleAbortSessionQuery(
        deps Dependencies,
        w http.ResponseWriter,
        projectID, location string,
        connProps []bqtypes.ConnectionProperty,
) {
        start := time.Now().UTC()
        end := start
        sessionInfo := sessionStore(&deps).Resolve(projectID, location, false, connProps)
        job := deps.Jobs.CompleteQueryWithResult(projectID, location, 0, start, end, &jobs.QueryResult{})
        stampJobSessionInfo(job, sessionInfo)
        out := assembleQueryResponse(job, nil, nil, nil, nil, "", "", nil, nil, sessionInfo)
        writeJSON(w, http.StatusOK, out)
}

package bqanalyticshub

import (
        "context"
        "sync"

        "cloud.google.com/go/bigquery/analyticshub/apiv1/analyticshubpb"
        "google.golang.org/grpc"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
        "google.golang.org/protobuf/types/known/emptypb"
)

// Server implements the Analytics Hub gRPC surface with in-memory storage.
type Server struct {
        analyticshubpb.UnimplementedAnalyticsHubServiceServer
        exchanges sync.Map // name string -> *analyticshubpb.DataExchange
        listings  sync.Map // name string -> *analyticshubpb.Listing
}

// RegisterGRPC wires AnalyticsHubService onto srv.
func RegisterGRPC(srv grpc.ServiceRegistrar) {
        if srv == nil {
                return
        }
        analyticshubpb.RegisterAnalyticsHubServiceServer(srv, &Server{})
}

// CreateDataExchange registers a data exchange. Returns AlreadyExists when
// the name is taken.
func (s *Server) CreateDataExchange(
        _ context.Context,
        req *analyticshubpb.CreateDataExchangeRequest,
) (*analyticshubpb.DataExchange, error) {
        if req == nil || req.GetParent() == "" || req.GetDataExchangeId() == "" {
                return nil, status.Error(codes.InvalidArgument, "parent and data_exchange_id are required")
        }
        name := req.GetParent() + "/dataExchanges/" + req.GetDataExchangeId()
        in := req.GetDataExchange()
        if in == nil {
                in = &analyticshubpb.DataExchange{}
        }
        out := &analyticshubpb.DataExchange{
                Name:        name,
                DisplayName: in.GetDisplayName(),
                Description: in.GetDescription(),
        }
        if _, loaded := s.exchanges.LoadOrStore(name, out); loaded {
                return nil, status.Errorf(codes.AlreadyExists, "DataExchange %s already exists", name)
        }
        return out, nil
}

// GetDataExchange returns a stored data exchange.
func (s *Server) GetDataExchange(
        _ context.Context,
        req *analyticshubpb.GetDataExchangeRequest,
) (*analyticshubpb.DataExchange, error) {
        if req == nil || req.GetName() == "" {
                return nil, status.Error(codes.InvalidArgument, "name is required")
        }
        v, ok := s.exchanges.Load(req.GetName())
        if !ok {
                return nil, status.Errorf(codes.NotFound, "DataExchange %s not found", req.GetName())
        }
        ex, _ := v.(*analyticshubpb.DataExchange)
        return ex, nil
}

// DeleteDataExchange removes a data exchange and its listings.
func (s *Server) DeleteDataExchange(
        _ context.Context,
        req *analyticshubpb.DeleteDataExchangeRequest,
) (*emptypb.Empty, error) {
        if req == nil || req.GetName() == "" {
                return nil, status.Error(codes.InvalidArgument, "name is required")
        }
        if _, ok := s.exchanges.LoadAndDelete(req.GetName()); !ok {
                return nil, status.Errorf(codes.NotFound, "DataExchange %s not found", req.GetName())
        }
        prefix := req.GetName() + "/listings/"
        s.listings.Range(func(key, _ any) bool {
                if name, ok := key.(string); ok && len(name) > len(prefix) && name[:len(prefix)] == prefix {
                        s.listings.Delete(name)
                }
                return true
        })
        return &emptypb.Empty{}, nil
}

// CreateListing registers a listing under a data exchange.
func (s *Server) CreateListing(
        _ context.Context,
        req *analyticshubpb.CreateListingRequest,
) (*analyticshubpb.Listing, error) {
        if req == nil || req.GetParent() == "" || req.GetListingId() == "" {
                return nil, status.Error(codes.InvalidArgument, "parent and listing_id are required")
        }
        if _, ok := s.exchanges.Load(req.GetParent()); !ok {
                return nil, status.Errorf(codes.NotFound, "DataExchange %s not found", req.GetParent())
        }
        name := req.GetParent() + "/listings/" + req.GetListingId()
        in := req.GetListing()
        if in == nil {
                in = &analyticshubpb.Listing{}
        }
        out := &analyticshubpb.Listing{
                Name:        name,
                DisplayName: in.GetDisplayName(),
                Description: in.GetDescription(),
                Source:      in.GetSource(),
        }
        if _, loaded := s.listings.LoadOrStore(name, out); loaded {
                return nil, status.Errorf(codes.AlreadyExists, "Listing %s already exists", name)
        }
        return out, nil
}

package bqconnection

import (
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
)

// FixtureAnnotation is stored on Connection.description when created in
// fixture mode so EXTERNAL_QUERY resolution can locate snapshot data.
const FixtureAnnotation = "bqemu:fixture"

// ModeForConnection resolves the config mode for a connection resource name.
func ModeForConnection(cfg *sourceconfig.Config, name string) sourceconfig.Mode {
        if cfg == nil {
                return sourceconfig.ModeFixture
        }
        return cfg.ResolveConnection(name)
}

// AnnotateFixtureDescription prefixes description when mode is fixture.
func AnnotateFixtureDescription(cfg *sourceconfig.Config, name, description string) string {
        if ModeForConnection(cfg, name) != sourceconfig.ModeFixture {
                return description
        }
        if strings.Contains(description, FixtureAnnotation) {
                return description
        }
        if description == "" {
                return FixtureAnnotation
        }
        return FixtureAnnotation + " " + description
}

package bqconnection

import (
        "context"
        "strings"

        "cloud.google.com/go/bigquery/connection/apiv1/connectionpb"
        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "google.golang.org/grpc"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
        "google.golang.org/protobuf/types/known/emptypb"
        "google.golang.org/protobuf/types/known/fieldmaskpb"
)

// Server implements the BigQuery Connection API gRPC surface.
type Server struct {
        connectionpb.UnimplementedConnectionServiceServer
        store *Store
        cfg   *sourceconfig.Config
}

// RegisterGRPC wires ConnectionService onto srv.
func RegisterGRPC(srv grpc.ServiceRegistrar, deps handlers.Dependencies) {
        if srv == nil {
                return
        }
        st, err := OpenStore(deps.ExternalSources)
        if err != nil {
                return
        }
        connectionpb.RegisterConnectionServiceServer(srv, &Server{store: st, cfg: deps.ExternalSources})
}

// ListConnections returns connections under parent.
func (s *Server) ListConnections(
        _ context.Context,
        req *connectionpb.ListConnectionsRequest,
) (*connectionpb.ListConnectionsResponse, error) {
        if req == nil || req.GetParent() == "" {
                return nil, status.Error(codes.InvalidArgument, "parent is required")
        }
        return &connectionpb.ListConnectionsResponse{
                Connections: s.store.List(req.GetParent()),
        }, nil
}

// CreateConnection stores a connection record on disk.
func (s *Server) CreateConnection(
        _ context.Context,
        req *connectionpb.CreateConnectionRequest,
) (*connectionpb.Connection, error) {
        if req == nil || req.GetParent() == "" || req.GetConnectionId() == "" {
                return nil, status.Error(codes.InvalidArgument, "parent and connection_id are required")
        }
        name := req.GetParent() + "/connections/" + req.GetConnectionId()
        if _, ok := s.store.Get(name); ok {
                return nil, status.Errorf(codes.AlreadyExists, "Connection %s already exists", name)
        }
        conn := req.GetConnection()
        if conn == nil {
                conn = &connectionpb.Connection{}
        }
        out, err := CloneConnection(conn)
        if err != nil {
                return nil, status.Errorf(codes.Internal, "clone connection: %v", err)
        }
        out.Name = name
        out.FriendlyName = conn.GetFriendlyName()
        out.Description = AnnotateFixtureDescription(s.cfg, name, conn.GetDescription())
        copyConnectionProperties(out, conn)
        if err := s.store.Put(out); err != nil {
                return nil, status.Errorf(codes.Internal, "persist connection: %v", err)
        }
        return out, nil
}

// GetConnection returns a previously created connection.
func (s *Server) GetConnection(
        _ context.Context,
        req *connectionpb.GetConnectionRequest,
) (*connectionpb.Connection, error) {
        if req == nil || req.GetName() == "" {
                return nil, status.Error(codes.InvalidArgument, "name is required")
        }
        conn, ok := s.store.Get(req.GetName())
        if !ok {
                return nil, status.Errorf(codes.NotFound, "Connection %s not found", req.GetName())
        }
        return conn, nil
}

// UpdateConnection mutates an existing connection and persists it.
func (s *Server) UpdateConnection(
        _ context.Context,
        req *connectionpb.UpdateConnectionRequest,
) (*connectionpb.Connection, error) {
        if req == nil || req.GetName() == "" {
                return nil, status.Error(codes.InvalidArgument, "name is required")
        }
        existing, ok := s.store.Get(req.GetName())
        if !ok {
                return nil, status.Errorf(codes.NotFound, "Connection %s not found", req.GetName())
        }
        patch := req.GetConnection()
        if patch == nil {
                return nil, status.Error(codes.InvalidArgument, "connection is required")
        }
        out, err := CloneConnection(existing)
        if err != nil {
                return nil, status.Errorf(codes.Internal, "clone connection: %v", err)
        }
        applyConnectionUpdateMask(out, patch, req.GetUpdateMask())
        if err := s.store.Put(out); err != nil {
                return nil, status.Errorf(codes.Internal, "persist connection: %v", err)
        }
        return out, nil
}

// DeleteConnection removes a connection record.
func (s *Server) DeleteConnection(
        _ context.Context,
        req *connectionpb.DeleteConnectionRequest,
) (*emptypb.Empty, error) {
        if req == nil || req.GetName() == "" {
                return nil, status.Error(codes.InvalidArgument, "name is required")
        }
        if _, ok := s.store.Get(req.GetName()); !ok {
                return nil, status.Errorf(codes.NotFound, "Connection %s not found", req.GetName())
        }
        if err := s.store.Delete(req.GetName()); err != nil {
                return nil, status.Errorf(codes.Internal, "delete connection: %v", err)
        }
        return &emptypb.Empty{}, nil
}

func applyConnectionUpdateMask(dst, patch *connectionpb.Connection, mask *fieldmaskpb.FieldMask) {
        if dst == nil || patch == nil {
                return
        }
        paths := mask.GetPaths()
        if len(paths) == 0 {
                if patch.FriendlyName != "" {
                        dst.FriendlyName = patch.FriendlyName
                }
                if patch.Description != "" {
                        dst.Description = patch.Description
                }
                copyConnectionProperties(dst, patch)
                return
        }
        for _, p := range paths {
                switch strings.TrimSpace(strings.ToLower(p)) {
                case "friendly_name", "friendlyname":
                        dst.FriendlyName = patch.FriendlyName
                case "description":
                        dst.Description = patch.Description
                case "cloud_sql", "cloudsql":
                        if patch.GetCloudSql() != nil {
                                dst.Properties = &connectionpb.Connection_CloudSql{CloudSql: patch.GetCloudSql()}
                        }
                case "cloud_spanner", "cloudspanner":
                        if patch.GetCloudSpanner() != nil {
                                dst.Properties = &connectionpb.Connection_CloudSpanner{CloudSpanner: patch.GetCloudSpanner()}
                        }
                case "aws":
                        if patch.GetAws() != nil {
                                dst.Properties = &connectionpb.Connection_Aws{Aws: patch.GetAws()}
                        }
                case "azure":
                        if patch.GetAzure() != nil {
                                dst.Properties = &connectionpb.Connection_Azure{Azure: patch.GetAzure()}
                        }
                case "cloud_resource", "cloudresource":
                        if patch.GetCloudResource() != nil {
                                dst.Properties = &connectionpb.Connection_CloudResource{CloudResource: patch.GetCloudResource()}
                        }
                case "spark":
                        if patch.GetSpark() != nil {
                                dst.Properties = &connectionpb.Connection_Spark{Spark: patch.GetSpark()}
                        }
                }
        }
}

func copyConnectionProperties(dst, src *connectionpb.Connection) {
        if dst == nil || src == nil {
                return
        }
        switch p := src.Properties.(type) {
        case *connectionpb.Connection_CloudSql:
                dst.Properties = &connectionpb.Connection_CloudSql{CloudSql: p.CloudSql}
        case *connectionpb.Connection_Aws:
                dst.Properties = &connectionpb.Connection_Aws{Aws: p.Aws}
        case *connectionpb.Connection_Azure:
                dst.Properties = &connectionpb.Connection_Azure{Azure: p.Azure}
        case *connectionpb.Connection_CloudSpanner:
                dst.Properties = &connectionpb.Connection_CloudSpanner{CloudSpanner: p.CloudSpanner}
        case *connectionpb.Connection_CloudResource:
                dst.Properties = &connectionpb.Connection_CloudResource{CloudResource: p.CloudResource}
        case *connectionpb.Connection_Spark:
                dst.Properties = &connectionpb.Connection_Spark{Spark: p.Spark}
        case *connectionpb.Connection_SalesforceDataCloud:
                dst.Properties = &connectionpb.Connection_SalesforceDataCloud{SalesforceDataCloud: p.SalesforceDataCloud}
        }
}

// Package bqconnection is the shallow-emulator skeleton for the
// BigQuery Connection API surface (gRPC, exposed at the storage gRPC
// port per docker-compose.yml). The gRPC layer is intentionally NOT
// registered in this skeleton because doing so would require:
//
//  1. Adding `cloud.google.com/go/bigquery/connection/apiv1/connectionpb`
//     and the associated `cloud.google.com/go/iam/apiv1/iampb` Go
//     dependencies, which transitively pull ~30 packages this repo
//     does not currently link.
//  2. Building a connection-record storage layer (this repo's
//     `backend/catalog/` is C++ and does not yet model connection
//     records).
//
// Both are explicitly larger than the shallow-emulator port budget
// per `docs/ENGINE_POLICY.md`.
// The surface-mapping table below documents which failing-IT each
// intended handler symbol satisfies, so follow-up ports use a
// one-to-one mapping rather than a free-form rebuild.
//
// Failing-IT → intended handler mapping (shallow-emulator intake table):
//
//        CreateAwsConnectionIT  → connectionpb.ConnectionService.CreateConnection
//                                  ⇒ gateway/handlers/bqconnection/server.go: CreateConnection
//                                  ⇒ gateway/handlers/bqconnection/rest_handler.go (HTTP/JSON variant)
//                                  ⇒ gateway/handlers/bqconnection/connection_properties.go: applyCloudSQLFromCreate,
//                                     validateConnectionPropertiesOneof
//
//        DeleteConnectionIT     → connectionpb.ConnectionService.DeleteConnection
//                                  ⇒ gateway/handlers/bqconnection/server.go: DeleteConnection
//        GetConnectionIT        → connectionpb.ConnectionService.GetConnection
//                                  ⇒ gateway/handlers/bqconnection/server.go: GetConnection
//        ShareConnectionIT      → connectionpb.ConnectionService.{GetIamPolicy,SetIamPolicy}
//                                  ⇒ gateway/handlers/bqconnection/server.go: {GetIamPolicy,SetIamPolicy}
//                                    (currently UNIMPLEMENTED — IT will fail-fast)
//        UpdateConnectionIT     → connectionpb.ConnectionService.UpdateConnection
//                                  ⇒ gateway/handlers/bqconnection/server.go: UpdateConnection
//                                  ⇒ gateway/handlers/bqconnection/connection_mask_paths.go: applyConnectionUpdateMask
//                                  ⇒ gateway/handlers/bqconnection/connection_update.go: per-field setters
//
// Storage adapter shim (deferred): connection-record helpers
// (GetConnectionRecord, PutConnectionRecord, ListConnectionRecords,
// DeleteConnectionRecord, IsNotFound) map onto
// this repo's `backend/storage/` once a connections table lands. The
// initial cut should keep them in-process (a `sync.Map`-backed store
// is fine for the live-IT track) and add a SQLite-backed
// implementation only when persistence becomes necessary.
package bqconnection

import (
        "net/http"
)

// Register is the symbolic entry point the gateway will call once the
// gRPC surface lands. Until then the gateway routes the few REST
// shapes the Java client falls back to (POST /v1beta1/projects/...
// and equivalent gapic-rest paths) to NotImplementedHTTP below.
func Register(_ *http.ServeMux) {}

// NotImplementedHTTP returns a structured 501 for any Connection API
// REST probe the gateway might add ahead of the full gRPC port. The
// existing gateway/handlers.NotImplemented helper would do; this
// indirection keeps the package self-contained.
func NotImplementedHTTP(w http.ResponseWriter, _ *http.Request) {
        const body = `{"error":{"code":501,"message":"BigQuery Connection API is not yet implemented by the emulator. See docs/ENGINE_POLICY.md and ROADMAP.md.","status":"notImplemented","errors":[{"reason":"notImplemented","message":"BigQuery Connection API is not yet implemented by the emulator.","domain":"global"}]}}`
        w.Header().Set("Content-Type", "application/json; charset=utf-8")
        w.WriteHeader(http.StatusNotImplemented)
        _, _ = w.Write([]byte(body))
}

package bqconnection

import (
        "encoding/json"
        "errors"
        "os"
        "path/filepath"
        "sync"

        "cloud.google.com/go/bigquery/connection/apiv1/connectionpb"
        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
        "google.golang.org/protobuf/encoding/protojson"
)

const registryDir = "_registry"

// Store persists Connection records under $data_dir/external/connections/_registry/.
type Store struct {
        path   string
        mu     sync.RWMutex
        byName map[string]*connectionpb.Connection
}

// OpenStore loads or creates the connection registry for dataDir.
func OpenStore(cfg *sourceconfig.Config) (*Store, error) {
        root := ""
        if cfg != nil {
                root = cfg.ConnectionFixtureRoot()
        }
        if root == "" {
                return &Store{byName: map[string]*connectionpb.Connection{}}, nil
        }
        dir := filepath.Join(root, registryDir)
        if err := os.MkdirAll(dir, 0o750); err != nil {
                return nil, err
        }
        path := filepath.Join(dir, "connections.json")
        s := &Store{path: path, byName: map[string]*connectionpb.Connection{}}
        if err := s.load(); err != nil {
                return nil, err
        }
        return s, nil
}

func (s *Store) load() error {
        if s == nil || s.path == "" {
                return nil
        }
        raw, err := os.ReadFile(s.path) //nolint:gosec // operator-controlled data dir
        if err != nil {
                if errors.Is(err, os.ErrNotExist) {
                        return nil
                }
                return err
        }
        var envelope struct {
                Connections []*connectionpb.Connection `json:"connections"`
        }
        if err := json.Unmarshal(raw, &envelope); err != nil {
                corruptPath := s.path + ".corrupt"
                if renameErr := os.Rename(s.path, corruptPath); renameErr != nil {
                        return err
                }
                return nil
        }
        for _, c := range envelope.Connections {
                if c == nil || c.Name == "" {
                        continue
                }
                s.byName[c.Name] = c
        }
        return nil
}

func (s *Store) persist() error {
        if s == nil || s.path == "" {
                return nil
        }
        s.mu.RLock()
        items := make([]*connectionpb.Connection, 0, len(s.byName))
        for _, c := range s.byName {
                items = append(items, c)
        }
        s.mu.RUnlock()
        raw, err := json.MarshalIndent(struct {
                Connections []*connectionpb.Connection `json:"connections"`
        }{Connections: items}, "", "  ")
        if err != nil {
                return err
        }
        tmp := s.path + ".tmp"
        if err := os.WriteFile(tmp, raw, 0o600); err != nil {
                return err
        }
        return os.Rename(tmp, s.path)
}

// Put stores or replaces a connection by name.
func (s *Store) Put(conn *connectionpb.Connection) error {
        if s == nil || conn == nil || conn.Name == "" {
                return errors.New("connection name is required")
        }
        s.mu.Lock()
        s.byName[conn.Name] = conn
        s.mu.Unlock()
        return s.persist()
}

// Get returns a connection by resource name.
func (s *Store) Get(name string) (*connectionpb.Connection, bool) {
        if s == nil {
                return nil, false
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        c, ok := s.byName[name]
        return c, ok
}

// Delete removes a connection by name.
func (s *Store) Delete(name string) error {
        if s == nil || name == "" {
                return errors.New("connection name is required")
        }
        s.mu.Lock()
        delete(s.byName, name)
        s.mu.Unlock()
        return s.persist()
}

// List returns all connections whose name has the given parent prefix.
func (s *Store) List(parent string) []*connectionpb.Connection {
        if s == nil {
                return nil
        }
        prefix := parent + "/connections/"
        s.mu.RLock()
        defer s.mu.RUnlock()
        out := make([]*connectionpb.Connection, 0)
        for name, c := range s.byName {
                if c == nil {
                        continue
                }
                if parent != "" && !hasParentPrefix(name, prefix) {
                        continue
                }
                out = append(out, c)
        }
        return out
}

func hasParentPrefix(name, prefix string) bool {
        return len(name) > len(prefix) && name[:len(prefix)] == prefix
}

// CloneConnection returns a protojson round-tripped copy for safe mutation.
func CloneConnection(in *connectionpb.Connection) (*connectionpb.Connection, error) {
        if in == nil {
                return &connectionpb.Connection{}, nil
        }
        raw, err := protojson.Marshal(in)
        if err != nil {
                return nil, err
        }
        out := &connectionpb.Connection{}
        if err := protojson.Unmarshal(raw, out); err != nil {
                return nil, err
        }
        return out, nil
}

package bqreservation

import (
        "context"

        "cloud.google.com/go/bigquery/reservation/apiv1/reservationpb"
        "google.golang.org/grpc"
)

// Server implements the shallow BigQuery Reservation API gRPC surface.
type Server struct {
        reservationpb.UnimplementedReservationServiceServer
}

// RegisterGRPC wires ReservationService onto srv.
func RegisterGRPC(srv grpc.ServiceRegistrar) {
        if srv == nil {
                return
        }
        reservationpb.RegisterReservationServiceServer(srv, &Server{})
}

// ListCapacityCommitments returns an empty page.
func (s *Server) ListCapacityCommitments(
        _ context.Context,
        _ *reservationpb.ListCapacityCommitmentsRequest,
) (*reservationpb.ListCapacityCommitmentsResponse, error) {
        return &reservationpb.ListCapacityCommitmentsResponse{
                CapacityCommitments: []*reservationpb.CapacityCommitment{},
        }, nil
}

// ListReservations returns an empty page.
func (s *Server) ListReservations(
        _ context.Context,
        _ *reservationpb.ListReservationsRequest,
) (*reservationpb.ListReservationsResponse, error) {
        return &reservationpb.ListReservationsResponse{
                Reservations: []*reservationpb.Reservation{},
        }, nil
}

package bqstorage

import (
        "errors"
        "fmt"
        "strconv"
        "strings"
        "time"

        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        "github.com/apache/arrow/go/v18/arrow"
        "github.com/apache/arrow/go/v18/arrow/array"
        "github.com/apache/arrow/go/v18/arrow/memory"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

func arrowSchemaFromEngine(schema *enginepb.TableSchema) *arrow.Schema {
        if schema == nil || len(schema.GetFields()) == 0 {
                return arrow.NewSchema(nil, nil)
        }
        fields := make([]arrow.Field, 0, len(schema.GetFields()))
        for _, f := range schema.GetFields() {
                fields = append(fields, arrow.Field{
                        Name:     f.GetName(),
                        Type:     arrowTypeForBQ(f.GetType()),
                        Nullable: strings.ToUpper(f.GetMode()) != bqModeRequired,
                })
        }
        return arrow.NewSchema(fields, nil)
}

func arrowTypeForBQ(t string) arrow.DataType {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case bqTypeINT64, bqTypeINTEGER:
                return arrow.PrimitiveTypes.Int64
        case bqTypeFLOAT64, bqTypeFLOAT:
                return arrow.PrimitiveTypes.Float64
        case bqTypeBOOL:
                return arrow.FixedWidthTypes.Boolean
        case bqTypeTIMESTAMP:
                return &arrow.TimestampType{Unit: arrow.Microsecond, TimeZone: "UTC"}
        case bqTypeDATETIME:
                return &arrow.TimestampType{Unit: arrow.Microsecond}
        case bqTypeSTRING, bqTypeJSON, bqTypeGEOGRAPHY, bqTypeDATE, bqTypeTIME,
                bqTypeBYTES, bqTypeNUMERIC, bqTypeBIGNUMERIC, bqTypeSTRUCT, bqTypeRECORD:
                return arrow.BinaryTypes.String
        default:
                return arrow.BinaryTypes.String
        }
}

func serializeArrowSchema(schema *enginepb.TableSchema) (*storagepb.ArrowSchema, error) {
        as := arrowSchemaFromEngine(schema)
        schemaBytes, err := serializeArrowIPCSchema(as)
        if err != nil {
                return nil, err
        }
        return &storagepb.ArrowSchema{SerializedSchema: schemaBytes}, nil
}

func rowsToArrowBatch(
        schema *enginepb.TableSchema,
        rows []*enginepb.DataRow,
) (*storagepb.ArrowRecordBatch, error) {
        as := arrowSchemaFromEngine(schema)
        mem := memory.NewGoAllocator()
        b := array.NewRecordBuilder(mem, as)
        defer b.Release()

        for colIdx, field := range schema.GetFields() {
                if appendErr := appendColumnValues(b.Field(colIdx), field.GetType(), rows, colIdx); appendErr != nil {
                        return nil, appendErr
                }
        }

        rec := b.NewRecord()
        defer rec.Release()

        batchBytes, err := serializeArrowIPCRecordBatch(as, rec)
        if err != nil {
                return nil, err
        }
        return &storagepb.ArrowRecordBatch{
                SerializedRecordBatch: batchBytes,
                RowCount:              int64(len(rows)),
        }, nil
}

func appendColumnValues(
        builder array.Builder,
        bqType string,
        rows []*enginepb.DataRow,
        colIdx int,
) error {
        switch strings.ToUpper(strings.TrimSpace(bqType)) {
        case bqTypeINT64, bqTypeINTEGER:
                return appendInt64Column(builder, rows, colIdx)
        case bqTypeFLOAT64, bqTypeFLOAT:
                return appendFloat64Column(builder, rows, colIdx)
        case bqTypeBOOL:
                return appendBoolColumn(builder, rows, colIdx)
        case bqTypeTIMESTAMP:
                return appendTimestampColumn(builder, rows, colIdx)
        case bqTypeDATETIME:
                return appendDatetimeColumn(builder, rows, colIdx)
        default:
                return appendStringColumn(builder, rows, colIdx)
        }
}

func appendInt64Column(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
        ib := builder.(*array.Int64Builder)
        for _, row := range rows {
                if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
                        ib.AppendNull()
                        continue
                }
                v, err := strconv.ParseInt(row.GetCells()[colIdx].GetStringValue(), 10, 64)
                if err != nil {
                        return fmt.Errorf("column %d INT64 parse: %w", colIdx, err)
                }
                ib.Append(v)
        }
        return nil
}

func appendFloat64Column(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
        fb := builder.(*array.Float64Builder)
        for _, row := range rows {
                if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
                        fb.AppendNull()
                        continue
                }
                v, err := strconv.ParseFloat(row.GetCells()[colIdx].GetStringValue(), 64)
                if err != nil {
                        return fmt.Errorf("column %d FLOAT64 parse: %w", colIdx, err)
                }
                fb.Append(v)
        }
        return nil
}

func appendBoolColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
        bb := builder.(*array.BooleanBuilder)
        for _, row := range rows {
                if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
                        bb.AppendNull()
                        continue
                }
                v, err := strconv.ParseBool(row.GetCells()[colIdx].GetStringValue())
                if err != nil {
                        return fmt.Errorf("column %d BOOL parse: %w", colIdx, err)
                }
                bb.Append(v)
        }
        return nil
}

func appendTimestampColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
        tb := builder.(*array.TimestampBuilder)
        for _, row := range rows {
                if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
                        tb.AppendNull()
                        continue
                }
                micros, err := timestampCellToMicros(row.GetCells()[colIdx].GetStringValue())
                if err != nil {
                        return fmt.Errorf("column %d TIMESTAMP parse: %w", colIdx, err)
                }
                tb.Append(arrow.Timestamp(micros))
        }
        return nil
}

func appendDatetimeColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
        tb := builder.(*array.TimestampBuilder)
        for _, row := range rows {
                if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
                        tb.AppendNull()
                        continue
                }
                micros, err := datetimeCellToMicros(row.GetCells()[colIdx].GetStringValue())
                if err != nil {
                        return fmt.Errorf("column %d DATETIME parse: %w", colIdx, err)
                }
                tb.Append(arrow.Timestamp(micros))
        }
        return nil
}

func appendStringColumn(builder array.Builder, rows []*enginepb.DataRow, colIdx int) error {
        sb := builder.(*array.StringBuilder)
        for _, row := range rows {
                if colIdx >= len(row.GetCells()) || row.GetCells()[colIdx].GetNullValue() {
                        sb.AppendNull()
                        continue
                }
                sb.Append(row.GetCells()[colIdx].GetStringValue())
        }
        return nil
}

func timestampCellToMicros(s string) (int64, error) {
        if strings.TrimSpace(s) == "" {
                return 0, errors.New("empty timestamp")
        }
        microsStr, err := bqtypes.TimestampStringToMicros(s)
        if err != nil {
                return 0, err
        }
        return strconv.ParseInt(microsStr, 10, 64)
}

func datetimeCellToMicros(s string) (int64, error) {
        s = strings.TrimSpace(s)
        if s == "" {
                return 0, errors.New("empty datetime")
        }
        s = strings.Replace(s, "T", " ", 1)
        layouts := []string{
                "2006-01-02 15:04:05.999999",
                "2006-01-02 15:04:05",
        }
        var lastErr error
        for _, layout := range layouts {
                t, err := time.Parse(layout, s)
                if err == nil {
                        return t.Unix()*1_000_000 + int64(t.Nanosecond()/1000), nil
                }
                lastErr = err
        }
        return 0, lastErr
}

package bqstorage

import (
        "bytes"
        "errors"
        "fmt"
        "io"

        "github.com/apache/arrow/go/v18/arrow"
        "github.com/apache/arrow/go/v18/arrow/ipc"
)

type countingReader struct {
        r   io.Reader
        pos int64
}

func (c *countingReader) Read(p []byte) (int, error) {
        n, err := c.r.Read(p)
        c.pos += int64(n)
        return n, err
}

func ipcMessageAt(data []byte, index int) ([]byte, error) {
        r := &countingReader{r: bytes.NewReader(data)}
        msgRdr := ipc.NewMessageReader(r)
        defer msgRdr.Release()

        var (
                start int64
                end   int64
        )
        for i := 0; ; i++ {
                msgStart := r.pos
                msg, err := msgRdr.Message()
                if err != nil {
                        if errors.Is(err, io.EOF) {
                                return nil, fmt.Errorf("arrow ipc: message index %d out of range", index)
                        }
                        return nil, err
                }
                msgEnd := r.pos
                msg.Release()

                if i == index {
                        start = msgStart
                        end = msgEnd
                        break
                }
        }
        return data[start:end], nil
}

func serializeArrowIPCSchema(as *arrow.Schema) ([]byte, error) {
        var stream bytes.Buffer
        w := ipc.NewWriter(&stream, ipc.WithSchema(as))
        if err := w.Close(); err != nil {
                return nil, err
        }
        return ipcMessageAt(stream.Bytes(), 0)
}

func serializeArrowIPCRecordBatch(as *arrow.Schema, rec arrow.Record) ([]byte, error) {
        var stream bytes.Buffer
        w := ipc.NewWriter(&stream, ipc.WithSchema(as))
        if err := w.Write(rec); err != nil {
                _ = w.Close()
                return nil, err
        }
        if err := w.Close(); err != nil {
                return nil, err
        }
        // Index 0 is schema; index 1 is the record batch (EOS follows).
        return ipcMessageAt(stream.Bytes(), 1)
}

package bqstorage

import (
        "encoding/json"
        "errors"
        "fmt"
        "strconv"
        "strings"
        "time"

        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        goavro "github.com/linkedin/goavro/v2"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

const avroRecordName = "root"

func serializeAvroSchema(schema *enginepb.TableSchema) (*storagepb.AvroSchema, error) {
        schemaJSON, err := avroSchemaJSONFromEngine(schema)
        if err != nil {
                return nil, err
        }
        return &storagepb.AvroSchema{Schema: schemaJSON}, nil
}

func avroSchemaJSONFromEngine(schema *enginepb.TableSchema) (string, error) {
        fields := make([]map[string]any, 0, len(schema.GetFields()))
        for _, f := range schema.GetFields() {
                avroField, err := engineFieldToAvroField(f)
                if err != nil {
                        return "", err
                }
                fields = append(fields, avroField)
        }
        root := map[string]any{
                avroKeyType: avroTypeRecord,
                avroKeyName: avroRecordName,
                "fields":    fields,
        }
        b, err := json.Marshal(root)
        if err != nil {
                return "", fmt.Errorf("marshal Avro schema: %w", err)
        }
        return string(b), nil
}

func engineFieldToAvroField(f *enginepb.FieldSchema) (map[string]any, error) {
        if f == nil {
                return nil, errors.New("nil field schema")
        }
        typ := bqTypeToAvroType(f.GetType())
        if strings.ToUpper(f.GetMode()) != bqModeRequired {
                typ = []any{"null", typ}
        }
        return map[string]any{
                avroKeyName: f.GetName(),
                avroKeyType: typ,
        }, nil
}

func bqTypeToAvroType(t string) any {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case bqTypeBOOL:
                return "boolean"
        case bqTypeINT64, bqTypeINTEGER:
                return avroTypeLong
        case bqTypeFLOAT64, bqTypeFLOAT:
                return "double"
        case bqTypeBYTES:
                return avroTypeBytes
        case bqTypeSTRING:
                return avroTypeString
        case bqTypeDATE:
                return map[string]any{avroKeyType: "int", avroKeyLogicalType: "date"}
        case bqTypeDATETIME:
                return map[string]any{avroKeyType: avroTypeString, avroKeyLogicalType: "datetime"}
        case bqTypeTIMESTAMP:
                return map[string]any{avroKeyType: avroTypeLong, avroKeyLogicalType: "timestamp-micros"}
        case bqTypeTIME:
                return map[string]any{avroKeyType: avroTypeLong, avroKeyLogicalType: "time-micros"}
        case bqTypeNUMERIC:
                return map[string]any{
                        avroKeyType:        avroTypeBytes,
                        avroKeyLogicalType: "decimal",
                        "precision":        38,
                        "scale":            9,
                }
        case bqTypeBIGNUMERIC:
                return map[string]any{
                        avroKeyType:        avroTypeBytes,
                        avroKeyLogicalType: "decimal",
                        "precision":        77,
                        "scale":            38,
                }
        case bqTypeGEOGRAPHY:
                return map[string]any{avroKeyType: avroTypeString, "sqlType": bqTypeGEOGRAPHY}
        case bqTypeJSON:
                return map[string]any{avroKeyType: avroTypeString, "sqlType": bqTypeJSON}
        case bqTypeSTRUCT, bqTypeRECORD:
                // Nested structs are lowered to string cells in the engine shim today.
                return avroTypeString
        default:
                return avroTypeString
        }
}

func rowsToAvroBatch(
        schema *enginepb.TableSchema,
        rows []*enginepb.DataRow,
) (*storagepb.AvroRows, error) {
        schemaJSON, err := avroSchemaJSONFromEngine(schema)
        if err != nil {
                return nil, err
        }
        codec, err := goavro.NewCodec(schemaJSON)
        if err != nil {
                return nil, fmt.Errorf("create Avro codec: %w", err)
        }

        var binary []byte
        for _, row := range rows {
                native, convErr := engineRowToAvroNative(schema, row)
                if convErr != nil {
                        return nil, convErr
                }
                buf, encErr := codec.BinaryFromNative(nil, native)
                if encErr != nil {
                        return nil, fmt.Errorf("encode Avro row: %w", encErr)
                }
                binary = append(binary, buf...)
        }
        return &storagepb.AvroRows{
                SerializedBinaryRows: binary,
                RowCount:             int64(len(rows)),
        }, nil
}

func engineRowToAvroNative(
        schema *enginepb.TableSchema,
        row *enginepb.DataRow,
) (map[string]any, error) {
        out := make(map[string]any, len(schema.GetFields()))
        for colIdx, field := range schema.GetFields() {
                var cell *enginepb.Cell
                if colIdx < len(row.GetCells()) {
                        cell = row.GetCells()[colIdx]
                }
                val, err := cellToAvroNative(field, cell)
                if err != nil {
                        return nil, fmt.Errorf("column %q: %w", field.GetName(), err)
                }
                out[field.GetName()] = val
        }
        return out, nil
}

func cellToAvroNative(field *enginepb.FieldSchema, cell *enginepb.Cell) (any, error) {
        nullable := strings.ToUpper(field.GetMode()) != bqModeRequired
        nullCell := cell == nil || cell.GetNullValue()
        if nullCell {
                if nullable {
                        return nil, nil
                }
                return nil, errors.New("required column is null")
        }

        raw := strings.TrimSpace(cell.GetStringValue())
        typ := strings.ToUpper(strings.TrimSpace(field.GetType()))
        var val any
        var err error
        switch typ {
        case bqTypeBOOL:
                val, err = strconv.ParseBool(raw)
        case bqTypeINT64, bqTypeINTEGER:
                val, err = strconv.ParseInt(raw, 10, 64)
        case bqTypeFLOAT64, bqTypeFLOAT:
                val, err = strconv.ParseFloat(raw, 64)
        case bqTypeTIMESTAMP:
                micros, tsErr := timestampCellToMicros(raw)
                if tsErr != nil {
                        err = tsErr
                } else {
                        val = micros
                }
        case bqTypeDATE:
                val, err = dateStringToDays(raw)
        case bqTypeBYTES:
                val = []byte(raw)
        default:
                val = raw
        }
        if err != nil {
                return nil, err
        }
        if nullable {
                return unionNative(typ, val), nil
        }
        return val, nil
}

func dateStringToDays(s string) (int32, error) {
        t, err := time.Parse("2006-01-02", strings.TrimSpace(s))
        if err != nil {
                return 0, err
        }
        epoch := time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
        return int32(t.Sub(epoch).Hours() / 24), nil
}

func unionNative(bqType string, val any) map[string]any {
        switch strings.ToUpper(strings.TrimSpace(bqType)) {
        case bqTypeBOOL:
                return map[string]any{"boolean": val}
        case bqTypeINT64, bqTypeINTEGER, bqTypeTIMESTAMP:
                return map[string]any{avroTypeLong: val}
        case bqTypeFLOAT64, bqTypeFLOAT:
                return map[string]any{"double": val}
        case bqTypeBYTES, bqTypeNUMERIC, bqTypeBIGNUMERIC:
                return map[string]any{avroTypeBytes: val}
        case bqTypeDATE:
                return map[string]any{"int": val}
        default:
                return map[string]any{avroTypeString: val}
        }
}

package bqstorage

import (
        "strings"
        "time"

        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/protobuf/types/known/timestamppb"
)

func engineCreateReadSessionRequest(
        in *storagepb.CreateReadSessionRequest,
) *enginepb.CreateReadSessionRequest {
        if in == nil {
                return nil
        }
        out := &enginepb.CreateReadSessionRequest{
                Parent:         in.GetParent(),
                MaxStreamCount: in.GetMaxStreamCount(),
        }
        if rs := in.GetReadSession(); rs != nil {
                out.ReadSession = &enginepb.ReadSession{
                        Table: rs.GetTable(),
                }
                if opts := rs.GetReadOptions(); opts != nil {
                        out.ReadSession.ReadOptions = &enginepb.ReadOptions{
                                SelectedFields: append([]string(nil), opts.GetSelectedFields()...),
                                RowRestriction: opts.GetRowRestriction(),
                        }
                }
        }
        return out
}

func publicReadSessionFromEngine(
        in *enginepb.ReadSession,
        dataFormat storagepb.DataFormat,
) (*storagepb.ReadSession, error) {
        if in == nil {
                return nil, nil
        }
        out := &storagepb.ReadSession{
                Name:  in.GetName(),
                Table: in.GetTable(),
        }
        if opts := in.GetReadOptions(); opts != nil {
                out.ReadOptions = &storagepb.ReadSession_TableReadOptions{
                        SelectedFields: append([]string(nil), opts.GetSelectedFields()...),
                        RowRestriction: opts.GetRowRestriction(),
                }
        }
        for _, st := range in.GetStreams() {
                out.Streams = append(out.Streams, &storagepb.ReadStream{Name: st.GetName()})
        }
        switch dataFormat {
        case storagepb.DataFormat_ARROW:
                arrowSchema, err := serializeArrowSchema(in.GetSchema())
                if err != nil {
                        return nil, err
                }
                out.Schema = &storagepb.ReadSession_ArrowSchema{ArrowSchema: arrowSchema}
                out.DataFormat = storagepb.DataFormat_ARROW
        case storagepb.DataFormat_AVRO:
                avroSchema, err := serializeAvroSchema(in.GetSchema())
                if err != nil {
                        return nil, err
                }
                out.Schema = &storagepb.ReadSession_AvroSchema{AvroSchema: avroSchema}
                out.DataFormat = storagepb.DataFormat_AVRO
        default:
                out.DataFormat = storagepb.DataFormat_ARROW
                if arrowSchema, err := serializeArrowSchema(in.GetSchema()); err == nil {
                        out.Schema = &storagepb.ReadSession_ArrowSchema{ArrowSchema: arrowSchema}
                }
        }
        return out, nil
}

func engineTableSchemaToPublic(in *enginepb.TableSchema) *storagepb.TableSchema {
        if in == nil {
                return nil
        }
        out := &storagepb.TableSchema{}
        for _, f := range in.GetFields() {
                out.Fields = append(out.Fields, engineFieldToPublic(f))
        }
        return out
}

func engineFieldToPublic(f *enginepb.FieldSchema) *storagepb.TableFieldSchema {
        if f == nil {
                return nil
        }
        return &storagepb.TableFieldSchema{
                Name:        f.GetName(),
                Type:        engineTypeToPublic(f.GetType()),
                Mode:        engineModeToPublic(f.GetMode()),
                Description: f.GetDescription(),
        }
}

func engineTypeToPublic(t string) storagepb.TableFieldSchema_Type {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case bqTypeSTRING:
                return storagepb.TableFieldSchema_STRING
        case bqTypeBYTES:
                return storagepb.TableFieldSchema_BYTES
        case bqTypeINT64:
                return storagepb.TableFieldSchema_INT64
        case bqTypeFLOAT64:
                return storagepb.TableFieldSchema_DOUBLE
        case bqTypeBOOL:
                return storagepb.TableFieldSchema_BOOL
        case bqTypeTIMESTAMP:
                return storagepb.TableFieldSchema_TIMESTAMP
        case bqTypeDATE:
                return storagepb.TableFieldSchema_DATE
        case bqTypeTIME:
                return storagepb.TableFieldSchema_TIME
        case bqTypeDATETIME:
                return storagepb.TableFieldSchema_DATETIME
        case bqTypeNUMERIC:
                return storagepb.TableFieldSchema_NUMERIC
        case bqTypeBIGNUMERIC:
                return storagepb.TableFieldSchema_BIGNUMERIC
        case bqTypeJSON:
                return storagepb.TableFieldSchema_JSON
        case bqTypeGEOGRAPHY:
                return storagepb.TableFieldSchema_GEOGRAPHY
        case bqTypeSTRUCT, bqTypeRECORD:
                return storagepb.TableFieldSchema_STRUCT
        default:
                return storagepb.TableFieldSchema_STRING
        }
}

func engineModeToPublic(m string) storagepb.TableFieldSchema_Mode {
        switch strings.ToUpper(strings.TrimSpace(m)) {
        case bqModeRequired:
                return storagepb.TableFieldSchema_REQUIRED
        case bqModeRepeated:
                return storagepb.TableFieldSchema_REPEATED
        default:
                return storagepb.TableFieldSchema_NULLABLE
        }
}

func publicWriteTypeToEngine(t storagepb.WriteStream_Type) enginepb.WriteStream_Type {
        switch t {
        case storagepb.WriteStream_COMMITTED:
                return enginepb.WriteStream_COMMITTED
        case storagepb.WriteStream_PENDING:
                return enginepb.WriteStream_PENDING
        case storagepb.WriteStream_BUFFERED:
                return enginepb.WriteStream_BUFFERED
        default:
                return enginepb.WriteStream_COMMITTED
        }
}

func engineWriteTypeToPublic(t enginepb.WriteStream_Type) storagepb.WriteStream_Type {
        switch t {
        case enginepb.WriteStream_COMMITTED:
                return storagepb.WriteStream_COMMITTED
        case enginepb.WriteStream_PENDING:
                return storagepb.WriteStream_PENDING
        case enginepb.WriteStream_BUFFERED:
                return storagepb.WriteStream_BUFFERED
        default:
                return storagepb.WriteStream_TYPE_UNSPECIFIED
        }
}

func publicWriteStreamFromEngine(in *enginepb.WriteStream) *storagepb.WriteStream {
        if in == nil {
                return nil
        }
        out := &storagepb.WriteStream{
                Name:        in.GetName(),
                Type:        engineWriteTypeToPublic(in.GetType()),
                TableSchema: engineTableSchemaToPublic(in.GetSchema()),
        }
        if ts := in.GetCreateTime(); ts != "" {
                if t, err := time.Parse(time.RFC3339, ts); err == nil {
                        out.CreateTime = timestamppb.New(t)
                }
        }
        return out
}

func engineWriteStreamFromPublic(in *storagepb.WriteStream) *enginepb.WriteStream {
        if in == nil {
                return nil
        }
        return &enginepb.WriteStream{
                Type: publicWriteTypeToEngine(in.GetType()),
        }
}

package bqstorage

import "math"

func uint64ToSignedInt64(v uint64) int64 {
        if v > uint64(math.MaxInt64) {
                return math.MaxInt64
        }
        return int64(v)
}

package bqstorage

import (
        "context"
        "errors"
        "fmt"
        "strconv"
        "strings"

        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/protobuf/proto"
        "google.golang.org/protobuf/reflect/protodesc"
        "google.golang.org/protobuf/reflect/protoreflect"
        "google.golang.org/protobuf/types/descriptorpb"
        "google.golang.org/protobuf/types/dynamicpb"
)

func protoDataToEngineRows(
        ctx context.Context,
        engineClient *engine.Client,
        writeStream string,
        data *storagepb.AppendRowsRequest_ProtoData,
        cachedDesc **descriptorpb.DescriptorProto,
) ([]*enginepb.DataRow, error) {
        if data == nil {
                return nil, nil
        }
        rows := data.GetRows()
        if rows == nil || len(rows.GetSerializedRows()) == 0 {
                return nil, nil
        }

        desc, err := resolveProtoDescriptor(ctx, engineClient, writeStream, data, cachedDesc)
        if err != nil {
                return nil, err
        }
        msgDesc, err := messageDescriptor(desc)
        if err != nil {
                return nil, err
        }
        out := make([]*enginepb.DataRow, 0, len(rows.GetSerializedRows()))
        for i, raw := range rows.GetSerializedRows() {
                msg := dynamicpb.NewMessage(msgDesc)
                if err := proto.Unmarshal(raw, msg); err != nil {
                        return nil, fmt.Errorf("row %d unmarshal: %w", i, err)
                }
                row, err := dynamicMessageToDataRow(msg)
                if err != nil {
                        return nil, fmt.Errorf("row %d decode: %w", i, err)
                }
                out = append(out, row)
        }
        return out, nil
}

func resolveProtoDescriptor(
        ctx context.Context,
        engineClient *engine.Client,
        writeStream string,
        data *storagepb.AppendRowsRequest_ProtoData,
        cachedDesc **descriptorpb.DescriptorProto,
) (*descriptorpb.DescriptorProto, error) {
        if cachedDesc == nil {
                return nil, errors.New("proto_rows missing writer_schema.proto_descriptor")
        }
        if desc := data.GetWriterSchema().GetProtoDescriptor(); desc != nil {
                *cachedDesc = desc
                return desc, nil
        }
        if *cachedDesc != nil {
                return *cachedDesc, nil
        }
        if engineClient == nil || engineClient.StorageWrite == nil || writeStream == "" {
                return nil, errors.New("proto_rows missing writer_schema.proto_descriptor")
        }
        stream, err := engineClient.StorageWrite.GetWriteStream(ctx, &enginepb.GetWriteStreamRequest{
                Name: writeStream,
        })
        if err != nil {
                return nil, fmt.Errorf("proto_rows missing writer_schema.proto_descriptor (GetWriteStream: %w)", err)
        }
        desc := descriptorFromEngineTableSchema(stream.GetSchema())
        if desc == nil {
                return nil, errors.New("proto_rows missing writer_schema.proto_descriptor")
        }
        *cachedDesc = desc
        return desc, nil
}

func descriptorFromEngineTableSchema(schema *enginepb.TableSchema) *descriptorpb.DescriptorProto {
        if schema == nil || len(schema.GetFields()) == 0 {
                return nil
        }
        desc := &descriptorpb.DescriptorProto{Name: new("Row")}
        for i, field := range schema.GetFields() {
                if field == nil {
                        continue
                }
                desc.Field = append(desc.Field, &descriptorpb.FieldDescriptorProto{
                        Name:   new(field.GetName()),
                        Number: new(int32(i + 1)),
                        Label:  engineModeToProtoLabel(field.GetMode()),
                        Type:   engineTypeToProtoType(field.GetType()),
                })
        }
        if len(desc.Field) == 0 {
                return nil
        }
        return desc
}

func engineModeToProtoLabel(mode string) *descriptorpb.FieldDescriptorProto_Label {
        switch strings.ToUpper(strings.TrimSpace(mode)) {
        case bqModeRequired:
                return descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum()
        case bqModeRepeated:
                return descriptorpb.FieldDescriptorProto_LABEL_REPEATED.Enum()
        default:
                return descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum()
        }
}

func engineTypeToProtoType(t string) *descriptorpb.FieldDescriptorProto_Type {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case bqTypeBOOL:
                return descriptorpb.FieldDescriptorProto_TYPE_BOOL.Enum()
        case bqTypeINT64, bqTypeINTEGER:
                return descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum()
        case bqTypeFLOAT64, "DOUBLE":
                return descriptorpb.FieldDescriptorProto_TYPE_DOUBLE.Enum()
        case bqTypeBYTES:
                return descriptorpb.FieldDescriptorProto_TYPE_BYTES.Enum()
        case bqTypeDATE:
                return descriptorpb.FieldDescriptorProto_TYPE_INT32.Enum()
        case bqTypeTIMESTAMP:
                return descriptorpb.FieldDescriptorProto_TYPE_INT64.Enum()
        case bqTypeDATETIME, bqTypeTIME:
                return descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum()
        default:
                return descriptorpb.FieldDescriptorProto_TYPE_STRING.Enum()
        }
}

func messageDescriptor(desc *descriptorpb.DescriptorProto) (protoreflect.MessageDescriptor, error) {
        if desc == nil {
                return nil, errors.New("nil descriptor")
        }
        fileDesc := &descriptorpb.FileDescriptorProto{
                Name:    new("bqstorage_row.proto"),
                Package: new("bqstorage"),
                MessageType: []*descriptorpb.DescriptorProto{
                        desc,
                },
        }
        fd, err := protodesc.NewFile(fileDesc, nil)
        if err != nil {
                return nil, err
        }
        md := fd.Messages().ByName(protoreflect.Name(desc.GetName()))
        if md == nil {
                return nil, fmt.Errorf("descriptor %q not found in file", desc.GetName())
        }
        return md, nil
}

func dynamicMessageToDataRow(msg protoreflect.Message) (*enginepb.DataRow, error) {
        fields := msg.Descriptor().Fields()
        cells := make([]*enginepb.Cell, 0, fields.Len())
        for i := 0; i < fields.Len(); i++ {
                fd := fields.Get(i)
                cell, err := fieldDescriptorToCell(msg, fd)
                if err != nil {
                        return nil, err
                }
                cells = append(cells, cell)
        }
        return &enginepb.DataRow{Cells: cells}, nil
}

func fieldDescriptorToCell(msg protoreflect.Message, fd protoreflect.FieldDescriptor) (*enginepb.Cell, error) {
        if fd.IsList() {
                if !msg.Has(fd) {
                        return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: &enginepb.Array{}}}, nil
                }
                list := msg.Get(fd).List()
                elems := make([]*enginepb.Cell, list.Len())
                for i := 0; i < list.Len(); i++ {
                        elem, err := protoreflectValueToCell(list.Get(i), fd)
                        if err != nil {
                                return nil, err
                        }
                        elems[i] = elem
                }
                return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: &enginepb.Array{Elements: elems}}}, nil
        }
        if fd.Kind() == protoreflect.MessageKind {
                if !msg.Has(fd) {
                        return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}, nil
                }
                return messageToStructCell(msg.Get(fd).Message())
        }
        if !msg.Has(fd) {
                return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}, nil
        }
        return protoreflectValueToCell(msg.Get(fd), fd)
}

func messageToStructCell(msg protoreflect.Message) (*enginepb.Cell, error) {
        fields := msg.Descriptor().Fields()
        fieldCells := make([]*enginepb.Cell, fields.Len())
        for i := 0; i < fields.Len(); i++ {
                fd := fields.Get(i)
                cell, err := fieldDescriptorToCell(msg, fd)
                if err != nil {
                        return nil, err
                }
                fieldCells[i] = cell
        }
        return &enginepb.Cell{
                Value: &enginepb.Cell_StructValue{
                        StructValue: &enginepb.Struct{Fields: fieldCells},
                },
        }, nil
}

func protoreflectValueToCell(v protoreflect.Value, fd protoreflect.FieldDescriptor) (*enginepb.Cell, error) {
        switch fd.Kind() {
        case protoreflect.MessageKind:
                return messageToStructCell(v.Message())
        case protoreflect.BoolKind:
                return &enginepb.Cell{
                        Value: &enginepb.Cell_StringValue{StringValue: strconv.FormatBool(v.Bool())},
                }, nil
        case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind,
                protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
                return int64Cell(v.Int()), nil
        case protoreflect.Uint32Kind, protoreflect.Fixed32Kind,
                protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
                return int64Cell(uint64ToSignedInt64(v.Uint())), nil
        case protoreflect.FloatKind:
                return &enginepb.Cell{
                        Value: &enginepb.Cell_StringValue{
                                StringValue: strconv.FormatFloat(float64(v.Float()), 'g', -1, 32),
                        },
                }, nil
        case protoreflect.DoubleKind:
                return &enginepb.Cell{
                        Value: &enginepb.Cell_StringValue{
                                StringValue: strconv.FormatFloat(v.Float(), 'g', -1, 64),
                        },
                }, nil
        case protoreflect.StringKind:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: v.String()}}, nil
        case protoreflect.BytesKind:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: string(v.Bytes())}}, nil
        default:
                return nil, fmt.Errorf("unsupported proto field kind %v", fd.Kind())
        }
}

func int64Cell(n int64) *enginepb.Cell {
        return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: strconv.FormatInt(n, 10)}}
}

package bqstorage

import (
        "context"
        "errors"
        "io"
        "strconv"
        "strings"
        "sync"

        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

// ReadServer implements the public BigQueryRead gRPC service by adapting
// requests to the engine's internal StorageRead contract and encoding row
// pages as Arrow IPC record batches.
type readSessionState struct {
        schema     *enginepb.TableSchema
        dataFormat storagepb.DataFormat
}

type ReadServer struct {
        storagepb.UnimplementedBigQueryReadServer
        engine *engine.Client

        mu       sync.RWMutex
        sessions map[string]*readSessionState
}

func (s *ReadServer) requireEngine() error {
        if s == nil || s.engine == nil || s.engine.StorageRead == nil {
                return status.Error(codes.Unavailable, "BigQuery Storage Read API requires a running engine subprocess")
        }
        return nil
}

func (s *ReadServer) rememberSession(
        name string,
        schema *enginepb.TableSchema,
        dataFormat storagepb.DataFormat,
) {
        if name == "" || schema == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        if s.sessions == nil {
                s.sessions = make(map[string]*readSessionState)
        }
        s.sessions[name] = &readSessionState{
                schema:     schema,
                dataFormat: dataFormat,
        }
}

func (s *ReadServer) sessionState(streamName string) *readSessionState {
        sessionName := streamName
        if i := strings.LastIndex(streamName, "/streams/"); i >= 0 {
                sessionName = streamName[:i]
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        return s.sessions[sessionName]
}

func (s *ReadServer) CreateReadSession(
        ctx context.Context,
        req *storagepb.CreateReadSessionRequest,
) (*storagepb.ReadSession, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        dataFormat := storagepb.DataFormat_ARROW
        if rs := req.GetReadSession(); rs != nil && rs.GetDataFormat() != storagepb.DataFormat_DATA_FORMAT_UNSPECIFIED {
                dataFormat = rs.GetDataFormat()
        }
        session, err := s.engine.StorageRead.CreateReadSession(ctx, engineCreateReadSessionRequest(req))
        if err != nil {
                return nil, err
        }
        s.rememberSession(session.GetName(), session.GetSchema(), dataFormat)
        return publicReadSessionFromEngine(session, dataFormat)
}

func (s *ReadServer) ReadRows(
        req *storagepb.ReadRowsRequest,
        stream storagepb.BigQueryRead_ReadRowsServer,
) error {
        if err := s.requireEngine(); err != nil {
                return err
        }
        ctx := stream.Context()
        engineStream, err := s.engine.StorageRead.ReadRows(ctx, &enginepb.ReadRowsRequest{
                ReadStream: req.GetReadStream(),
                Offset:     req.GetOffset(),
        })
        if err != nil {
                return err
        }

        state := s.sessionState(req.GetReadStream())
        dataFormat := storagepb.DataFormat_ARROW
        if state != nil && state.dataFormat != storagepb.DataFormat_DATA_FORMAT_UNSPECIFIED {
                dataFormat = state.dataFormat
        }
        return s.pumpEngineReadRows(engineStream, stream, state, dataFormat)
}

func (s *ReadServer) pumpEngineReadRows(
        engineStream enginepb.StorageRead_ReadRowsClient,
        stream storagepb.BigQueryRead_ReadRowsServer,
        state *readSessionState,
        dataFormat storagepb.DataFormat,
) error {
        sentSchema := false
        for {
                page, recvErr := engineStream.Recv()
                if errors.Is(recvErr, io.EOF) {
                        return nil
                }
                if recvErr != nil {
                        return recvErr
                }
                if len(page.GetRows()) == 0 {
                        continue
                }
                schema := (*enginepb.TableSchema)(nil)
                if state != nil {
                        schema = state.schema
                }
                if schema == nil {
                        schema = inferSchemaFromRow(page.GetRows()[0])
                }
                resp, err := readRowsResponseForFormat(dataFormat, schema, page.GetRows())
                if err != nil {
                        return status.Errorf(codes.Internal, "encode ReadRows batch: %v", err)
                }
                if !sentSchema {
                        if err := attachReadRowsSchema(resp, dataFormat, schema); err != nil {
                                return err
                        }
                        sentSchema = true
                }
                if err := stream.Send(resp); err != nil {
                        return err
                }
        }
}

func attachReadRowsSchema(
        resp *storagepb.ReadRowsResponse,
        dataFormat storagepb.DataFormat,
        schema *enginepb.TableSchema,
) error {
        switch dataFormat {
        case storagepb.DataFormat_AVRO:
                avroSchema, schemaErr := serializeAvroSchema(schema)
                if schemaErr != nil {
                        return status.Errorf(codes.Internal, "encode Avro schema: %v", schemaErr)
                }
                resp.Schema = &storagepb.ReadRowsResponse_AvroSchema{AvroSchema: avroSchema}
        default:
                arrowSchema, schemaErr := serializeArrowSchema(schema)
                if schemaErr != nil {
                        return status.Errorf(codes.Internal, "encode Arrow schema: %v", schemaErr)
                }
                resp.Schema = &storagepb.ReadRowsResponse_ArrowSchema{ArrowSchema: arrowSchema}
        }
        return nil
}

func readRowsResponseForFormat(
        dataFormat storagepb.DataFormat,
        schema *enginepb.TableSchema,
        rows []*enginepb.DataRow,
) (*storagepb.ReadRowsResponse, error) {
        switch dataFormat {
        case storagepb.DataFormat_AVRO:
                batch, err := rowsToAvroBatch(schema, rows)
                if err != nil {
                        return nil, err
                }
                rowCount := int64(len(rows))
                return &storagepb.ReadRowsResponse{
                        Rows:     &storagepb.ReadRowsResponse_AvroRows{AvroRows: batch},
                        RowCount: rowCount,
                }, nil
        default:
                batch, err := rowsToArrowBatch(schema, rows)
                if err != nil {
                        return nil, err
                }
                rowCount := int64(len(rows))
                return &storagepb.ReadRowsResponse{
                        Rows: &storagepb.ReadRowsResponse_ArrowRecordBatch{
                                ArrowRecordBatch: batch,
                        },
                        RowCount: rowCount,
                }, nil
        }
}

func (s *ReadServer) SplitReadStream(
        ctx context.Context,
        req *storagepb.SplitReadStreamRequest,
) (*storagepb.SplitReadStreamResponse, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        resp, err := s.engine.StorageRead.SplitReadStream(ctx, &enginepb.SplitReadStreamRequest{
                Name:     req.GetName(),
                Fraction: req.GetFraction(),
        })
        if err != nil {
                return nil, err
        }
        out := &storagepb.SplitReadStreamResponse{}
        if primary := resp.GetPrimaryStream(); primary != nil {
                out.PrimaryStream = &storagepb.ReadStream{Name: primary.GetName()}
        }
        if remainder := resp.GetRemainderStream(); remainder != nil {
                out.RemainderStream = &storagepb.ReadStream{Name: remainder.GetName()}
        }
        return out, nil
}

func inferSchemaFromRow(row *enginepb.DataRow) *enginepb.TableSchema {
        if row == nil {
                return &enginepb.TableSchema{}
        }
        schema := &enginepb.TableSchema{}
        for i := range row.GetCells() {
                schema.Fields = append(schema.Fields, &enginepb.FieldSchema{
                        Name: columnName(i),
                        Type: bqTypeSTRING,
                        Mode: bqModeNullable,
                })
        }
        return schema
}

func columnName(i int) string {
        return "col_" + strconv.Itoa(i)
}

package bqstorage

import (
        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "google.golang.org/grpc"
)

// RegisterGRPC wires the public BigQuery Storage Read/Write services onto
// srv. The gateway calls this during startup so client libraries dialing
// BIGQUERY_STORAGE_GRPC_ENDPOINT reach google.cloud.bigquery.storage.v1
// rather than the engine-internal bigquery_emulator.v1.* service names.
func RegisterGRPC(srv grpc.ServiceRegistrar, eng *engine.Client) {
        if srv == nil {
                return
        }
        read := &ReadServer{engine: eng}
        write := &WriteServer{engine: eng}
        storagepb.RegisterBigQueryReadServer(srv, read)
        storagepb.RegisterBigQueryWriteServer(srv, write)
}

// Package bqstorage is the public BigQuery Storage gRPC shim. It registers
// google.cloud.bigquery.storage.v1.BigQueryRead / BigQueryWrite on the
// gateway listener and adapts RPCs to the engine's internal
// bigquery_emulator.v1.StorageRead / StorageWrite contracts.
package bqstorage

import (
        "context"
        "errors"
        "io"
        "strings"
        "time"

        "cloud.google.com/go/bigquery/storage/apiv1/storagepb"
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
        "google.golang.org/protobuf/types/descriptorpb"
        "google.golang.org/protobuf/types/known/timestamppb"
        "google.golang.org/protobuf/types/known/wrapperspb"
)

// WriteServer implements the public BigQueryWrite gRPC service.
type WriteServer struct {
        storagepb.UnimplementedBigQueryWriteServer
        engine *engine.Client
}

func (s *WriteServer) requireEngine() error {
        if s == nil || s.engine == nil || s.engine.StorageWrite == nil {
                return status.Error(codes.Unavailable, "BigQuery Storage Write API requires a running engine subprocess")
        }
        return nil
}

func (s *WriteServer) CreateWriteStream(
        ctx context.Context,
        req *storagepb.CreateWriteStreamRequest,
) (*storagepb.WriteStream, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        streamType := storagepb.WriteStream_COMMITTED
        if ws := req.GetWriteStream(); ws != nil &&
                ws.GetType() != storagepb.WriteStream_TYPE_UNSPECIFIED {
                streamType = ws.GetType()
        }
        if streamType == storagepb.WriteStream_COMMITTED ||
                streamType == storagepb.WriteStream_TYPE_UNSPECIFIED {
                return s.defaultWriteStream(ctx, req.GetParent())
        }
        stream, err := s.engine.StorageWrite.CreateWriteStream(ctx, &enginepb.CreateWriteStreamRequest{
                Parent:      req.GetParent(),
                WriteStream: engineWriteStreamFromPublic(req.GetWriteStream()),
        })
        if err != nil {
                return nil, err
        }
        return publicWriteStreamFromEngine(stream), nil
}

func defaultWriteStreamName(parent string) string {
        return strings.TrimRight(parent, "/") + "/streams/_default"
}

func (s *WriteServer) defaultWriteStream(
        ctx context.Context,
        parent string,
) (*storagepb.WriteStream, error) {
        name := defaultWriteStreamName(parent)
        existing, err := s.engine.StorageWrite.GetWriteStream(ctx, &enginepb.GetWriteStreamRequest{
                Name: name,
        })
        if err == nil {
                out := publicWriteStreamFromEngine(existing)
                out.Name = name
                out.Type = storagepb.WriteStream_COMMITTED
                return out, nil
        }
        // Mint schema metadata via CreateWriteStream; the engine registers the
        // reserved _default stream lazily on the first AppendRows.
        probe, err := s.engine.StorageWrite.CreateWriteStream(ctx, &enginepb.CreateWriteStreamRequest{
                Parent: parent,
                WriteStream: &enginepb.WriteStream{
                        Type: enginepb.WriteStream_COMMITTED,
                },
        })
        if err != nil {
                return nil, err
        }
        out := publicWriteStreamFromEngine(probe)
        out.Name = name
        out.Type = storagepb.WriteStream_COMMITTED
        return out, nil
}

func (s *WriteServer) AppendRows(stream storagepb.BigQueryWrite_AppendRowsServer) error {
        if err := s.requireEngine(); err != nil {
                return err
        }
        ctx := stream.Context()
        engineStream, err := s.engine.StorageWrite.AppendRows(ctx)
        if err != nil {
                return err
        }

        var cachedProtoDesc *descriptorpb.DescriptorProto
        for {
                req, recvErr := stream.Recv()
                if errors.Is(recvErr, io.EOF) {
                        return nil
                }
                if recvErr != nil {
                        return recvErr
                }
                engineReq, convErr := s.publicAppendRequestToEngine(ctx, req, &cachedProtoDesc)
                if convErr != nil {
                        return status.Errorf(codes.InvalidArgument, "decode AppendRows: %v", convErr)
                }
                if err := engineStream.Send(engineReq); err != nil {
                        return err
                }
                engineResp, err := engineStream.Recv()
                if err != nil {
                        return err
                }
                if err := stream.Send(publicAppendResponseFromEngine(req.GetWriteStream(), engineResp)); err != nil {
                        return err
                }
        }
}

func (s *WriteServer) GetWriteStream(
        ctx context.Context,
        req *storagepb.GetWriteStreamRequest,
) (*storagepb.WriteStream, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        stream, err := s.engine.StorageWrite.GetWriteStream(ctx, &enginepb.GetWriteStreamRequest{
                Name: req.GetName(),
        })
        if err == nil {
                return publicWriteStreamFromEngine(stream), nil
        }
        if before, ok := strings.CutSuffix(req.GetName(), "/streams/_default"); ok {
                parent := before
                return s.defaultWriteStream(ctx, parent)
        }
        return nil, err
}

func (s *WriteServer) FinalizeWriteStream(
        ctx context.Context,
        req *storagepb.FinalizeWriteStreamRequest,
) (*storagepb.FinalizeWriteStreamResponse, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        resp, err := s.engine.StorageWrite.FinalizeWriteStream(ctx, &enginepb.FinalizeWriteStreamRequest{
                Name: req.GetName(),
        })
        if err != nil {
                return nil, err
        }
        return &storagepb.FinalizeWriteStreamResponse{
                RowCount: resp.GetRowCount(),
        }, nil
}

func (s *WriteServer) BatchCommitWriteStreams(
        ctx context.Context,
        req *storagepb.BatchCommitWriteStreamsRequest,
) (*storagepb.BatchCommitWriteStreamsResponse, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        resp, err := s.engine.StorageWrite.BatchCommitWriteStreams(
                ctx,
                &enginepb.BatchCommitWriteStreamsRequest{
                        Parent:       req.GetParent(),
                        WriteStreams: append([]string(nil), req.GetWriteStreams()...),
                },
        )
        if err != nil {
                return nil, err
        }
        out := &storagepb.BatchCommitWriteStreamsResponse{}
        if ts := resp.GetCommitTime(); ts != "" {
                if t, parseErr := time.Parse(time.RFC3339, ts); parseErr == nil {
                        out.CommitTime = timestamppb.New(t)
                }
        }
        return out, nil
}

func (s *WriteServer) FlushRows(
        ctx context.Context,
        req *storagepb.FlushRowsRequest,
) (*storagepb.FlushRowsResponse, error) {
        if err := s.requireEngine(); err != nil {
                return nil, err
        }
        offset := int64(0)
        if req.GetOffset() != nil {
                offset = req.GetOffset().GetValue()
        }
        resp, err := s.engine.StorageWrite.FlushRows(ctx, &enginepb.FlushRowsRequest{
                WriteStream: req.GetWriteStream(),
                Offset:      offset,
        })
        if err != nil {
                return nil, err
        }
        return &storagepb.FlushRowsResponse{Offset: resp.GetOffset()}, nil
}

func (s *WriteServer) publicAppendRequestToEngine(
        ctx context.Context,
        req *storagepb.AppendRowsRequest,
        cachedProtoDesc **descriptorpb.DescriptorProto,
) (*enginepb.AppendRowsRequest, error) {
        if req == nil {
                return nil, status.Error(codes.InvalidArgument, "nil AppendRowsRequest")
        }
        out := &enginepb.AppendRowsRequest{
                WriteStream: req.GetWriteStream(),
                TraceId:     req.GetTraceId(),
        }
        if req.GetOffset() != nil {
                out.Offset = req.GetOffset().GetValue()
        }
        switch rows := req.GetRows().(type) {
        case *storagepb.AppendRowsRequest_ProtoRows:
                engineRows, err := protoDataToEngineRows(
                        ctx,
                        s.engine,
                        req.GetWriteStream(),
                        rows.ProtoRows,
                        cachedProtoDesc,
                )
                if err != nil {
                        return nil, err
                }
                out.ProtoRows = &enginepb.AppendRowsRequest_ProtoData{Rows: engineRows}
        case *storagepb.AppendRowsRequest_ArrowRows:
                return nil, status.Error(
                        codes.Unimplemented,
                        "Arrow AppendRows is not implemented by the emulator storage shim",
                )
        default:
                return out, nil
        }
        return out, nil
}

func publicAppendResponseFromEngine(
        writeStream string,
        in *enginepb.AppendRowsResponse,
) *storagepb.AppendRowsResponse {
        if in == nil {
                return &storagepb.AppendRowsResponse{WriteStream: writeStream}
        }
        out := &storagepb.AppendRowsResponse{WriteStream: writeStream}
        if msg := in.GetErrorMessage(); msg != "" {
                out.Response = &storagepb.AppendRowsResponse_Error{
                        Error: status.New(codes.InvalidArgument, msg).Proto(),
                }
                return out
        }
        result := &storagepb.AppendRowsResponse_AppendResult{}
        if ar := in.GetAppendResult(); ar != nil {
                result.Offset = wrapperspb.Int64(ar.GetOffset())
        }
        out.Response = &storagepb.AppendRowsResponse_AppendResult_{
                AppendResult: result,
        }
        return out
}

package bqv2grpc

import (
        "strconv"
        "strings"
        "time"

        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/routines"
        "google.golang.org/protobuf/types/known/wrapperspb"
)

const (
        datasetKind = "bigquery#dataset"
        tableKind   = "bigquery#table"
        jobKind     = "bigquery#job"
)

func nowMillis() int64 {
        return time.Now().UnixMilli()
}

func datasetFromREST(projectID, datasetID string, ds bqtypes.Dataset) *bigquerypb.Dataset {
        if ds.Labels == nil {
                ds.Labels = bqtypes.ResourceLabels{}
        }
        if ds.Access == nil {
                ds.Access = []map[string]any{}
        }
        if ds.Location == "" {
                ds.Location = "US"
        }
        ct := parseMillis(ds.CreationTime)
        if ct == 0 {
                ct = nowMillis()
        }
        lmt := parseMillis(ds.LastModifiedTime)
        if lmt == 0 {
                lmt = nowMillis()
        }
        out := &bigquerypb.Dataset{
                Kind:             datasetKind,
                Id:               projectID + ":" + datasetID,
                DatasetReference: &bigquerypb.DatasetReference{ProjectId: projectID, DatasetId: datasetID},
                Location:         ds.Location,
                Labels:           map[string]string(ds.Labels),
                CreationTime:     ct,
                LastModifiedTime: lmt,
                Etag:             ds.Etag,
        }
        if ds.FriendlyName != "" {
                out.FriendlyName = wrapperspb.String(ds.FriendlyName)
        }
        if ds.Description != "" {
                out.Description = wrapperspb.String(ds.Description)
        }
        return out
}

func datasetToREST(ds *bigquerypb.Dataset) bqtypes.Dataset {
        if ds == nil {
                return bqtypes.Dataset{}
        }
        out := bqtypes.Dataset{
                Kind:             ds.GetKind(),
                ID:               ds.GetId(),
                FriendlyName:     ds.GetFriendlyName().GetValue(),
                Description:      ds.GetDescription().GetValue(),
                Location:         ds.GetLocation(),
                Etag:             ds.GetEtag(),
                CreationTime:     formatMillis(ds.GetCreationTime()),
                LastModifiedTime: formatMillis(ds.GetLastModifiedTime()),
                Labels:           bqtypes.ResourceLabels(ds.GetLabels()),
                Access:           []map[string]any{},
        }
        if ref := ds.GetDatasetReference(); ref != nil {
                out.DatasetReference = bqtypes.DatasetReference{
                        ProjectID: ref.GetProjectId(),
                        DatasetID: ref.GetDatasetId(),
                }
        }
        return out
}

func listDatasetFromRef(projectID, datasetID string, labels map[string]string) *bigquerypb.ListFormatDataset {
        if labels == nil {
                labels = map[string]string{}
        }
        return &bigquerypb.ListFormatDataset{
                Kind: datasetKind,
                Id:   projectID + ":" + datasetID,
                DatasetReference: &bigquerypb.DatasetReference{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                },
                Labels: labels,
        }
}

func tableFromREST(projectID, datasetID, tableID string, t bqtypes.Table) *bigquerypb.Table {
        if t.Labels == nil {
                t.Labels = bqtypes.ResourceLabels{}
        }
        if t.Type == "" {
                t.Type = tableTypeTable
        }
        if t.Location == "" {
                t.Location = "US"
        }
        ct := parseMillis(t.CreationTime)
        if ct == 0 {
                ct = nowMillis()
        }
        lmt := parseMillis(t.LastModifiedTime)
        if lmt == 0 {
                lmt = nowMillis()
        }
        out := &bigquerypb.Table{
                Kind: tableKind,
                Id:   projectID + ":" + datasetID + "." + tableID,
                TableReference: &bigquerypb.TableReference{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
                Type:             t.Type,
                Labels:           map[string]string(t.Labels),
                CreationTime:     ct,
                LastModifiedTime: uint64FromNonNegativeInt64(lmt),
                Etag:             t.Etag,
                Location:         t.Location,
                Schema:           schemaToProto(t.Schema),
        }
        if n := parseInt64(t.NumRows); n > 0 {
                out.NumRows = wrapperspb.UInt64(uint64(n))
        }
        if n := parseInt64(t.NumBytes); n > 0 {
                out.NumBytes = wrapperspb.Int64(n)
        }
        if t.FriendlyName != "" {
                out.FriendlyName = wrapperspb.String(t.FriendlyName)
        }
        if t.Description != "" {
                out.Description = wrapperspb.String(t.Description)
        }
        return out
}

func tableToREST(t *bigquerypb.Table) bqtypes.Table {
        if t == nil {
                return bqtypes.Table{}
        }
        out := bqtypes.Table{
                Kind:             t.GetKind(),
                ID:               t.GetId(),
                FriendlyName:     t.GetFriendlyName().GetValue(),
                Description:      t.GetDescription().GetValue(),
                Type:             t.GetType(),
                Etag:             t.GetEtag(),
                CreationTime:     formatMillis(t.GetCreationTime()),
                LastModifiedTime: formatMillis(int64FromUint64(t.GetLastModifiedTime())),
                NumRows:          formatUInt64(t.GetNumRows().GetValue()),
                NumBytes:         formatInt64(t.GetNumBytes().GetValue()),
                Location:         t.GetLocation(),
                Labels:           bqtypes.ResourceLabels(t.GetLabels()),
                Schema:           schemaFromProto(t.GetSchema()),
        }
        if ref := t.GetTableReference(); ref != nil {
                out.TableReference = bqtypes.TableReference{
                        ProjectID: ref.GetProjectId(),
                        DatasetID: ref.GetDatasetId(),
                        TableID:   ref.GetTableId(),
                }
        }
        return out
}

func listTableFromRef(
        projectID, datasetID, tableID, tableType string,
        labels map[string]string,
) *bigquerypb.ListFormatTable {
        if labels == nil {
                labels = map[string]string{}
        }
        if tableType == "" {
                tableType = "TABLE"
        }
        return &bigquerypb.ListFormatTable{
                Kind: tableKind,
                Id:   projectID + ":" + datasetID + "." + tableID,
                TableReference: &bigquerypb.TableReference{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
                Type:   tableType,
                Labels: labels,
        }
}

func schemaToProto(s *bqtypes.TableSchema) *bigquerypb.TableSchema {
        if s == nil {
                return nil
        }
        out := &bigquerypb.TableSchema{Fields: make([]*bigquerypb.TableFieldSchema, 0, len(s.Fields))}
        for i := range s.Fields {
                out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
        }
        return out
}

func fieldToProto(f bqtypes.TableFieldSchema) *bigquerypb.TableFieldSchema {
        out := &bigquerypb.TableFieldSchema{
                Name: f.Name,
                Type: f.Type,
                Mode: f.Mode,
        }
        if f.Description != "" {
                out.Description = wrapperspb.String(f.Description)
        }
        for i := range f.Fields {
                out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
        }
        return out
}

func schemaFromProto(s *bigquerypb.TableSchema) *bqtypes.TableSchema {
        if s == nil || len(s.GetFields()) == 0 {
                return nil
        }
        out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, 0, len(s.GetFields()))}
        for _, f := range s.GetFields() {
                out.Fields = append(out.Fields, fieldFromProto(f))
        }
        return out
}

func fieldFromProto(f *bigquerypb.TableFieldSchema) bqtypes.TableFieldSchema {
        fieldType := normalizeRESTFieldType(f.GetType())
        if strings.EqualFold(fieldType, "STRUCT") {
                fieldType = "RECORD"
        }
        out := bqtypes.TableFieldSchema{
                Name:        f.GetName(),
                Type:        fieldType,
                Mode:        f.GetMode(),
                Description: f.GetDescription().GetValue(),
        }
        for _, sub := range f.GetFields() {
                out.Fields = append(out.Fields, fieldFromProto(sub))
        }
        return out
}

func normalizeRESTFieldType(t string) string {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case "INT64":
                return "INTEGER"
        case "FLOAT64":
                return "FLOAT"
        case "BOOL":
                return "BOOLEAN"
        default:
                return t
        }
}

func schemaToEngine(s *bigquerypb.TableSchema) *enginepb.TableSchema {
        if s == nil {
                return nil
        }
        out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.GetFields()))}
        for _, f := range s.GetFields() {
                out.Fields = append(out.Fields, engineFieldFromProto(f))
        }
        return out
}

func engineFieldFromProto(f *bigquerypb.TableFieldSchema) *enginepb.FieldSchema {
        out := &enginepb.FieldSchema{
                Name:        f.GetName(),
                Type:        f.GetType(),
                Mode:        f.GetMode(),
                Description: f.GetDescription().GetValue(),
        }
        for _, sub := range f.GetFields() {
                out.Fields = append(out.Fields, engineFieldFromProto(sub))
        }
        return out
}

func jobReferenceToProto(ref bqtypes.JobReference) *bigquerypb.JobReference {
        out := &bigquerypb.JobReference{
                ProjectId: ref.ProjectID,
                JobId:     ref.JobID,
        }
        if ref.Location != "" {
                out.Location = wrapperspb.String(ref.Location)
        }
        return out
}

func jobListEntryToProto(j *jobs.Job) *bigquerypb.ListFormatJob {
        if j == nil {
                return nil
        }
        out := &bigquerypb.ListFormatJob{
                Kind:         jobKind,
                Id:           j.ID,
                JobReference: jobReferenceToProto(j.JobReference),
                State:        j.Status.State,
                Status: &bigquerypb.JobStatus{
                        State: j.Status.State,
                },
                Statistics: &bigquerypb.JobStatistics{
                        CreationTime: parseMillis(j.Statistics.CreationTime),
                        StartTime:    parseMillis(j.Statistics.StartTime),
                        EndTime:      parseMillis(j.Statistics.EndTime),
                },
        }
        if j.Configuration != nil {
                out.Configuration = &bigquerypb.JobConfiguration{
                        JobType: j.Configuration.JobType,
                }
        }
        return out
}

func routineFromREST(projectID, datasetID, routineID string, rt bqtypes.Routine) *bigquerypb.Routine {
        ct := parseMillis(rt.CreationTime)
        if ct == 0 {
                ct = nowMillis()
        }
        lmt := parseMillis(rt.LastModifiedTime)
        if lmt == 0 {
                lmt = nowMillis()
        }
        out := &bigquerypb.Routine{
                Etag: rt.Etag,
                RoutineReference: &bigquerypb.RoutineReference{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        RoutineId: routineID,
                },
                RoutineType:      routineTypeToProto(string(rt.RoutineType)),
                Language:         routineLanguageToProto(string(rt.Language)),
                DefinitionBody:   rt.DefinitionBody,
                CreationTime:     ct,
                LastModifiedTime: lmt,
        }
        if rt.Etag == "" {
                out.Etag = routines.MintEtag()
        }
        return out
}

func routineToREST(rt *bigquerypb.Routine) bqtypes.Routine {
        if rt == nil {
                return bqtypes.Routine{}
        }
        out := bqtypes.Routine{
                Etag:             rt.GetEtag(),
                RoutineType:      bqtypes.RoutineType(routineTypeFromProto(rt.GetRoutineType())),
                Language:         bqtypes.RoutineLanguage(routineLanguageFromProto(rt.GetLanguage())),
                DefinitionBody:   rt.GetDefinitionBody(),
                CreationTime:     formatMillis(rt.GetCreationTime()),
                LastModifiedTime: formatMillis(rt.GetLastModifiedTime()),
        }
        if ref := rt.GetRoutineReference(); ref != nil {
                out.RoutineReference = bqtypes.RoutineReference{
                        ProjectID: ref.GetProjectId(),
                        DatasetID: ref.GetDatasetId(),
                        RoutineID: ref.GetRoutineId(),
                }
        }
        return out
}

func routineTypeToProto(s string) bigquerypb.Routine_RoutineType {
        switch strings.ToUpper(strings.TrimSpace(s)) {
        case "PROCEDURE":
                return bigquerypb.Routine_PROCEDURE
        case "TABLE_VALUED_FUNCTION":
                return bigquerypb.Routine_TABLE_VALUED_FUNCTION
        default:
                return bigquerypb.Routine_SCALAR_FUNCTION
        }
}

func routineTypeFromProto(t bigquerypb.Routine_RoutineType) string {
        switch t {
        case bigquerypb.Routine_PROCEDURE:
                return "PROCEDURE"
        case bigquerypb.Routine_TABLE_VALUED_FUNCTION:
                return "TABLE_VALUED_FUNCTION"
        default:
                return "SCALAR_FUNCTION"
        }
}

func routineLanguageToProto(s string) bigquerypb.Routine_Language {
        if strings.EqualFold(s, "JAVASCRIPT") {
                return bigquerypb.Routine_JAVASCRIPT
        }
        return bigquerypb.Routine_SQL
}

func routineLanguageFromProto(l bigquerypb.Routine_Language) string {
        if l == bigquerypb.Routine_JAVASCRIPT {
                return "JAVASCRIPT"
        }
        return "SQL"
}

func parseInt64(s string) int64 {
        n, err := strconv.ParseInt(s, 10, 64)
        if err != nil {
                return 0
        }
        return n
}

func formatInt64(n int64) string {
        if n == 0 {
                return "0"
        }
        return strconv.FormatInt(n, 10)
}

func formatUInt64(n uint64) string {
        if n == 0 {
                return "0"
        }
        return strconv.FormatUint(n, 10)
}

package bqv2grpc

import (
        "context"

        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "google.golang.org/protobuf/types/known/emptypb"
)

// DatasetServer implements google.cloud.bigquery.v2.DatasetService.
type DatasetServer struct {
        bigquerypb.UnimplementedDatasetServiceServer
        deps handlers.Dependencies
}

func newDatasetServer(deps handlers.Dependencies) *DatasetServer {
        return &DatasetServer{deps: deps}
}

// ListDatasets lists datasets from the engine catalog.
func (s *DatasetServer) ListDatasets(
        ctx context.Context,
        req *bigquerypb.ListDatasetsRequest,
) (*bigquerypb.DatasetList, error) {
        projectID := req.GetProjectId()
        if s.deps.Catalog == nil {
                return &bigquerypb.DatasetList{
                        Kind:     "bigquery#datasetList",
                        Datasets: []*bigquerypb.ListFormatDataset{},
                }, nil
        }
        resp, err := s.deps.Catalog.ListDatasets(ctx, &enginepb.ListDatasetsRequest{
                ProjectId: projectID,
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        items := make([]*bigquerypb.ListFormatDataset, 0, len(resp.GetDatasets()))
        for _, ref := range resp.GetDatasets() {
                labels := map[string]string{}
                if overlay, ok := s.deps.Metadata.GetDataset(
                        ref.GetProjectId(), ref.GetDatasetId(),
                ); ok && overlay.Labels != nil {
                        labels = map[string]string(overlay.Labels)
                }
                items = append(items, listDatasetFromRef(
                        ref.GetProjectId(), ref.GetDatasetId(), labels))
        }
        return &bigquerypb.DatasetList{
                Kind:     "bigquery#datasetList",
                Datasets: items,
        }, nil
}

// InsertDataset registers a dataset in the engine catalog.
func (s *DatasetServer) InsertDataset(
        ctx context.Context,
        req *bigquerypb.InsertDatasetRequest,
) (*bigquerypb.Dataset, error) {
        projectID := req.GetProjectId()
        ds := datasetToREST(req.GetDataset())
        datasetID := ds.DatasetReference.DatasetID
        if datasetID == "" {
                return nil, invalidArg("datasetReference.datasetId is required")
        }
        if s.deps.Catalog == nil {
                return nil, unimplemented("dataset insert requires an engine")
        }
        location := ds.Location
        if location == "" {
                location = "US"
        }
        _, err := s.deps.Catalog.RegisterDataset(ctx, &enginepb.RegisterDatasetRequest{
                Dataset: &enginepb.DatasetRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                },
                Location: location,
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        s.deps.Metadata.PutDataset(projectID, datasetID, ds)
        return datasetFromREST(projectID, datasetID, ds), nil
}

// GetDataset returns dataset metadata.
func (s *DatasetServer) GetDataset(
        ctx context.Context,
        req *bigquerypb.GetDatasetRequest,
) (*bigquerypb.Dataset, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        exists, err := catalogDatasetExists(ctx, s.deps, projectID, datasetID)
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        if !exists {
                return nil, datasetNotFound(projectID, datasetID)
        }
        ds := datasetToREST(&bigquerypb.Dataset{})
        if overlay, ok := s.deps.Metadata.GetDataset(projectID, datasetID); ok {
                ds = overlay
        }
        return datasetFromREST(projectID, datasetID, ds), nil
}

// UpdateDataset replaces dataset metadata in the store.
func (s *DatasetServer) UpdateDataset(
        ctx context.Context,
        req *bigquerypb.UpdateOrPatchDatasetRequest,
) (*bigquerypb.Dataset, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        exists, err := catalogDatasetExists(ctx, s.deps, projectID, datasetID)
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        if !exists {
                return nil, datasetNotFound(projectID, datasetID)
        }
        ds := datasetToREST(req.GetDataset())
        s.deps.Metadata.PutDataset(projectID, datasetID, ds)
        return datasetFromREST(projectID, datasetID, ds), nil
}

// PatchDataset merges dataset metadata in the store.
func (s *DatasetServer) PatchDataset(
        ctx context.Context,
        req *bigquerypb.UpdateOrPatchDatasetRequest,
) (*bigquerypb.Dataset, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        exists, err := catalogDatasetExists(ctx, s.deps, projectID, datasetID)
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        if !exists {
                return nil, datasetNotFound(projectID, datasetID)
        }
        ds := datasetToREST(req.GetDataset())
        s.deps.Metadata.MergeDataset(projectID, datasetID, ds)
        if overlay, ok := s.deps.Metadata.GetDataset(projectID, datasetID); ok {
                ds = overlay
        }
        return datasetFromREST(projectID, datasetID, ds), nil
}

// DeleteDataset drops a dataset from the engine catalog.
func (s *DatasetServer) DeleteDataset(
        ctx context.Context,
        req *bigquerypb.DeleteDatasetRequest,
) (*emptypb.Empty, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        if s.deps.Catalog == nil {
                return nil, unimplemented("dataset delete requires an engine")
        }
        _, err := s.deps.Catalog.DropDataset(ctx, &enginepb.DropDatasetRequest{
                Dataset: &enginepb.DatasetRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                },
                DeleteContents: req.GetDeleteContents(),
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        s.deps.Metadata.DeleteDataset(projectID, datasetID)
        if req.GetDeleteContents() {
                s.deps.Metadata.DeleteTablesInDataset(projectID, datasetID)
        }
        return &emptypb.Empty{}, nil
}

func catalogDatasetExists(
        ctx context.Context,
        deps handlers.Dependencies,
        projectID, datasetID string,
) (bool, error) {
        if deps.Catalog == nil {
                return true, nil
        }
        resp, err := deps.Catalog.ListDatasets(ctx, &enginepb.ListDatasetsRequest{
                ProjectId: projectID,
        })
        if err != nil {
                return false, err
        }
        for _, ref := range resp.GetDatasets() {
                if ref.GetDatasetId() == datasetID {
                        return true, nil
                }
        }
        return false, nil
}

package bqv2grpc

import (
        "regexp"
        "strconv"

        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

var (
        notFoundResourceRE = regexp.MustCompile(
                `^(table|dataset) not found: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
        alreadyExistsResourceRE = regexp.MustCompile(
                `^(table|dataset) already exists: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
)

// grpcStatusFromEngine maps engine gRPC errors to client-facing status codes.
func grpcStatusFromEngine(err error) error {
        if err == nil {
                return nil
        }
        st, ok := status.FromError(err)
        if !ok {
                return status.Errorf(codes.Internal, "Engine RPC failed: %v", err)
        }
        switch st.Code() {
        case codes.OK:
                return nil
        case codes.NotFound, codes.AlreadyExists, codes.InvalidArgument,
                codes.FailedPrecondition, codes.PermissionDenied, codes.Unauthenticated,
                codes.Unimplemented, codes.Unavailable, codes.DeadlineExceeded,
                codes.ResourceExhausted:
                return status.Error(st.Code(), bqStyleMessage(st.Message()))
        default:
                return status.Errorf(codes.Internal, "%s", bqStyleMessage(st.Message()))
        }
}

func bqStyleMessage(msg string) string {
        if m := notFoundResourceRE.FindStringSubmatch(msg); m != nil {
                return bqStyleResourceMessage("Not found", m[1], m[2], m[3], m[4])
        }
        if m := alreadyExistsResourceRE.FindStringSubmatch(msg); m != nil {
                return bqStyleResourceMessage("Already Exists", m[1], m[2], m[3], m[4])
        }
        return msg
}

func bqStyleResourceMessage(verb, noun, project, dataset, table string) string {
        resource := project + ":" + dataset
        if table != "" {
                resource += "." + table
        }
        switch noun {
        case "table":
                return verb + ": Table " + resource
        case "dataset":
                return verb + ": Dataset " + resource
        default:
                return verb + ": " + noun + " " + resource
        }
}

func datasetNotFound(projectID, datasetID string) error {
        return status.Errorf(codes.NotFound, "Not found: Dataset %s:%s", projectID, datasetID)
}

func routineNotFound(projectID, datasetID, routineID string) error {
        return status.Errorf(codes.NotFound, "Not found: Routine %s:%s.%s", projectID, datasetID, routineID)
}

func invalidArg(msg string) error {
        return status.Error(codes.InvalidArgument, msg)
}

func unimplemented(msg string) error {
        return status.Error(codes.Unimplemented, msg)
}

func parseMillis(s string) int64 {
        n, err := strconv.ParseInt(s, 10, 64)
        if err != nil {
                return 0
        }
        return n
}

func formatMillis(n int64) string {
        if n == 0 {
                return ""
        }
        return strconv.FormatInt(n, 10)
}

package bqv2grpc

import (
        "context"

        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

// JobServer implements google.cloud.bigquery.v2.JobService (ListJobs only).
type JobServer struct {
        bigquerypb.UnimplementedJobServiceServer
        deps handlers.Dependencies
}

func newJobServer(deps handlers.Dependencies) *JobServer {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return &JobServer{deps: deps}
}

// ListJobs returns jobs from the in-memory registry.
func (s *JobServer) ListJobs(
        _ context.Context,
        req *bigquerypb.ListJobsRequest,
) (*bigquerypb.JobList, error) {
        if req.GetAllUsers() {
                return nil, unimplemented(
                        "jobs.list with allUsers=true is not supported; " +
                                "the emulator has no auth context to scope cross-user listings.")
        }
        opts := jobs.ListOptions{
                MaxResults:      int(req.GetMaxResults().GetValue()),
                PageToken:       req.GetPageToken(),
                ParentJobID:     req.GetParentJobId(),
                MinCreationTime: int64FromUint64(req.GetMinCreationTime()),
                MaxCreationTime: int64FromUint64(req.GetMaxCreationTime().GetValue()),
                StateFilter:     stateFiltersFromProto(req.GetStateFilter()),
        }
        items, nextPageToken := s.deps.Jobs.ListByProject(req.GetProjectId(), opts)
        out := make([]*bigquerypb.ListFormatJob, 0, len(items))
        for _, j := range items {
                out = append(out, jobListEntryToProto(j))
        }
        resp := &bigquerypb.JobList{
                Kind: "bigquery#jobList",
                Jobs: out,
        }
        if nextPageToken != "" {
                resp.NextPageToken = nextPageToken
        }
        return resp, nil
}

func stateFiltersFromProto(filters []bigquerypb.ListJobsRequest_StateFilter) []string {
        if len(filters) == 0 {
                return nil
        }
        out := make([]string, 0, len(filters))
        for _, f := range filters {
                switch f {
                case bigquerypb.ListJobsRequest_PENDING:
                        out = append(out, "pending")
                case bigquerypb.ListJobsRequest_RUNNING:
                        out = append(out, "running")
                case bigquerypb.ListJobsRequest_DONE:
                        out = append(out, "done")
                }
        }
        return out
}

package bqv2grpc

import "math"

func uint64FromNonNegativeInt64(v int64) uint64 {
        if v < 0 {
                return 0
        }
        return uint64(v)
}

func int64FromUint64(v uint64) int64 {
        if v > uint64(math.MaxInt64) {
                return math.MaxInt64
        }
        return int64(v)
}

func int32FromInt(v int) int32 {
        if v > int(math.MaxInt32) {
                return math.MaxInt32
        }
        if v < int(math.MinInt32) {
                return math.MinInt32
        }
        return int32(v)
}

package bqv2grpc

import (
        "context"

        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
)

// ProjectServer implements google.cloud.bigquery.v2.ProjectService.
type ProjectServer struct {
        bigquerypb.UnimplementedProjectServiceServer
}

func newProjectServer(_ handlers.Dependencies) *ProjectServer {
        return &ProjectServer{}
}

// GetServiceAccount returns the emulator's synthetic service account email.
func (s *ProjectServer) GetServiceAccount(
        _ context.Context,
        req *bigquerypb.GetServiceAccountRequest,
) (*bigquerypb.GetServiceAccountResponse, error) {
        projectID := req.GetProjectId()
        if projectID == "" {
                projectID = "test-project"
        }
        return &bigquerypb.GetServiceAccountResponse{
                Kind:  "bigquery#getServiceAccountResponse",
                Email: "bigquery-emulator@" + projectID + ".iam.gserviceaccount.com",
        }, nil
}

package bqv2grpc

import (
        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "google.golang.org/grpc"
)

// RegisterGRPC wires all BigQuery v2 gRPC services onto srv. Unimplemented
// methods on each embedded server return UNIMPLEMENTED automatically.
func RegisterGRPC(srv grpc.ServiceRegistrar, deps handlers.Dependencies) {
        if srv == nil {
                return
        }
        bigquerypb.RegisterDatasetServiceServer(srv, newDatasetServer(deps))
        bigquerypb.RegisterTableServiceServer(srv, newTableServer(deps))
        bigquerypb.RegisterJobServiceServer(srv, newJobServer(deps))
        bigquerypb.RegisterProjectServiceServer(srv, newProjectServer(deps))
        bigquerypb.RegisterRoutineServiceServer(srv, newRoutineServer(deps))
        bigquerypb.RegisterModelServiceServer(srv, &ModelServer{})
        bigquerypb.RegisterRowAccessPolicyServiceServer(srv, &RowAccessPolicyServer{})
}

// ModelServer stubs google.cloud.bigquery.v2.ModelService.
type ModelServer struct {
        bigquerypb.UnimplementedModelServiceServer
}

// RowAccessPolicyServer stubs google.cloud.bigquery.v2.RowAccessPolicyService.
type RowAccessPolicyServer struct {
        bigquerypb.UnimplementedRowAccessPolicyServiceServer
}

package bqv2grpc

import (
        "context"

        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "github.com/vantaboard/bigquery-emulator/gateway/routines"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
        "google.golang.org/protobuf/types/known/emptypb"
)

// RoutineServer implements google.cloud.bigquery.v2.RoutineService.
type RoutineServer struct {
        bigquerypb.UnimplementedRoutineServiceServer
        deps handlers.Dependencies
}

func newRoutineServer(deps handlers.Dependencies) *RoutineServer {
        return &RoutineServer{deps: deps}
}

func (s *RoutineServer) routineStore() *routines.Store {
        if s.deps.Routines == nil {
                s.deps.Routines = routines.NewStore()
        }
        return s.deps.Routines
}

// ListRoutines returns routines from the in-memory store.
func (s *RoutineServer) ListRoutines(
        _ context.Context,
        req *bigquerypb.ListRoutinesRequest,
) (*bigquerypb.ListRoutinesResponse, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        all := s.routineStore().List(projectID, datasetID, req.GetFilter())
        items := make([]*bigquerypb.Routine, 0, len(all))
        for _, rt := range all {
                items = append(items, routineFromREST(
                        rt.RoutineReference.ProjectID,
                        rt.RoutineReference.DatasetID,
                        rt.RoutineReference.RoutineID,
                        rt,
                ))
        }
        return &bigquerypb.ListRoutinesResponse{Routines: items}, nil
}

// GetRoutine returns a routine from the in-memory store.
func (s *RoutineServer) GetRoutine(
        _ context.Context,
        req *bigquerypb.GetRoutineRequest,
) (*bigquerypb.Routine, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        routineID := req.GetRoutineId()
        rt, ok := s.routineStore().Get(projectID, datasetID, routineID)
        if !ok {
                return nil, routineNotFound(projectID, datasetID, routineID)
        }
        return routineFromREST(projectID, datasetID, routineID, rt), nil
}

// InsertRoutine registers a new routine.
func (s *RoutineServer) InsertRoutine(
        _ context.Context,
        req *bigquerypb.InsertRoutineRequest,
) (*bigquerypb.Routine, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        rt := routineToREST(req.GetRoutine())
        routineID := rt.RoutineReference.RoutineID
        if routineID == "" {
                return nil, invalidArg("Required routineReference.routineId is missing.")
        }
        if rt.DefinitionBody == "" {
                return nil, invalidArg("Required definitionBody is missing.")
        }
        if rt.RoutineType == "" {
                rt.RoutineType = "SCALAR_FUNCTION"
        }
        if rt.Language == "" {
                rt.Language = "SQL"
        }
        out := routineFromREST(projectID, datasetID, routineID, rt)
        rest := routineToREST(out)
        if !s.routineStore().Insert(rest) {
                return nil, status.Errorf(codes.AlreadyExists,
                        "Already Exists: Routine %s:%s.%s", projectID, datasetID, routineID)
        }
        return out, nil
}

// UpdateRoutine replaces an existing routine.
func (s *RoutineServer) UpdateRoutine(
        _ context.Context,
        req *bigquerypb.UpdateRoutineRequest,
) (*bigquerypb.Routine, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        routineID := req.GetRoutineId()
        existing, ok := s.routineStore().Get(projectID, datasetID, routineID)
        if !ok {
                return nil, routineNotFound(projectID, datasetID, routineID)
        }
        rt := routineToREST(req.GetRoutine())
        out := routineFromREST(projectID, datasetID, routineID, rt)
        out.CreationTime = parseMillis(existing.CreationTime)
        out.Etag = routines.MintEtag()
        s.routineStore().Upsert(routineToREST(out))
        return out, nil
}

// DeleteRoutine removes a routine from the store.
func (s *RoutineServer) DeleteRoutine(
        _ context.Context,
        req *bigquerypb.DeleteRoutineRequest,
) (*emptypb.Empty, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        routineID := req.GetRoutineId()
        if !s.routineStore().Delete(projectID, datasetID, routineID) {
                return nil, routineNotFound(projectID, datasetID, routineID)
        }
        return &emptypb.Empty{}, nil
}

package bqv2grpc

import (
        "context"
        "strconv"

        "cloud.google.com/go/bigquery/v2/apiv2/bigquerypb"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "google.golang.org/protobuf/types/known/emptypb"
        "google.golang.org/protobuf/types/known/wrapperspb"
)

// TableServer implements google.cloud.bigquery.v2.TableService.
type TableServer struct {
        bigquerypb.UnimplementedTableServiceServer
        deps handlers.Dependencies
}

func newTableServer(deps handlers.Dependencies) *TableServer {
        return &TableServer{deps: deps}
}

// ListTables lists tables from the engine catalog.
func (s *TableServer) ListTables(
        ctx context.Context,
        req *bigquerypb.ListTablesRequest,
) (*bigquerypb.TableList, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        if s.deps.Catalog == nil {
                return &bigquerypb.TableList{
                        Kind:       "bigquery#tableList",
                        Tables:     []*bigquerypb.ListFormatTable{},
                        TotalItems: wrapperspb.Int32(0),
                }, nil
        }
        resp, err := s.deps.Catalog.ListTables(ctx, &enginepb.ListTablesRequest{
                Dataset: &enginepb.DatasetRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                },
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        items := make([]*bigquerypb.ListFormatTable, 0, len(resp.GetTables()))
        for _, ref := range resp.GetTables() {
                labels := map[string]string{}
                tableType := tableTypeTable
                if overlay, ok := s.deps.Metadata.GetTable(
                        ref.GetProjectId(), ref.GetDatasetId(), ref.GetTableId(),
                ); ok {
                        if overlay.Labels != nil {
                                labels = map[string]string(overlay.Labels)
                        }
                        if overlay.Type != "" {
                                tableType = overlay.Type
                        }
                } else if refType := ref.GetTableType(); refType != "" {
                        tableType = refType
                }
                items = append(items, listTableFromRef(
                        ref.GetProjectId(), ref.GetDatasetId(), ref.GetTableId(), tableType, labels))
        }
        return &bigquerypb.TableList{
                Kind:       "bigquery#tableList",
                Tables:     items,
                TotalItems: wrapperspb.Int32(int32FromInt(len(items))),
        }, nil
}

// InsertTable registers a table in the engine catalog.
func (s *TableServer) InsertTable(
        ctx context.Context,
        req *bigquerypb.InsertTableRequest,
) (*bigquerypb.Table, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        t := tableToREST(req.GetTable())
        tableID := t.TableReference.TableID
        if tableID == "" {
                return nil, invalidArg("tableReference.tableId is required")
        }
        if s.deps.Catalog == nil {
                return nil, unimplemented("table insert requires an engine")
        }
        _, err := s.deps.Catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                Table: &enginepb.TableRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
                Schema: schemaToEngine(req.GetTable().GetSchema()),
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        if t.DefaultCollation != "" {
                t.Schema = bqtypes.ApplyDefaultCollationToStringFields(t.Schema, t.DefaultCollation)
        }
        s.deps.Metadata.PutTable(projectID, datasetID, tableID, t)
        if s.deps.Snapshots != nil {
                created := strconv.FormatInt(nowMillis(), 10)
                if ms, parseErr := strconv.ParseInt(created, 10, 64); parseErr == nil {
                        s.deps.Snapshots.RecordCreation(projectID, datasetID, tableID, ms)
                }
        }
        return tableFromREST(projectID, datasetID, tableID, t), nil
}

// GetTable returns table metadata from the engine.
func (s *TableServer) GetTable(
        ctx context.Context,
        req *bigquerypb.GetTableRequest,
) (*bigquerypb.Table, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        tableID := req.GetTableId()
        if s.deps.Catalog == nil {
                return tableFromREST(projectID, datasetID, tableID, bqtypes.Table{}), nil
        }
        resp, err := s.deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{
                Table: &enginepb.TableRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        t := bqtypes.Table{Schema: schemaFromProto(schemaToEngineProto(resp.GetSchema()))}
        if overlay, ok := s.deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
                t = applyTableOverlay(t, overlay)
        }
        if s.deps.Snapshots != nil {
                if ct, ok := s.deps.Snapshots.CreationTimeMs(projectID, datasetID, tableID); ok {
                        t.CreationTime = strconv.FormatInt(ct, 10)
                }
        }
        if rowsResp, listErr := s.deps.Catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                Table: &enginepb.TableRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
                StartIndex: 0,
                MaxResults: 0,
        }); listErr == nil {
                t.NumRows = strconv.FormatInt(rowsResp.GetTotalRows(), 10)
        } else if t.NumRows == "" {
                t.NumRows = "0"
        }
        return tableFromREST(projectID, datasetID, tableID, t), nil
}

// UpdateTable replaces table metadata in the store.
func (s *TableServer) UpdateTable(
        ctx context.Context,
        req *bigquerypb.UpdateOrPatchTableRequest,
) (*bigquerypb.Table, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        tableID := req.GetTableId()
        t := tableToREST(req.GetTable())
        s.deps.Metadata.PutTable(projectID, datasetID, tableID, t)
        return tableFromREST(projectID, datasetID, tableID, t), nil
}

// PatchTable merges table metadata in the store.
func (s *TableServer) PatchTable(
        ctx context.Context,
        req *bigquerypb.UpdateOrPatchTableRequest,
) (*bigquerypb.Table, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        tableID := req.GetTableId()
        t := tableToREST(req.GetTable())
        s.deps.Metadata.MergeTable(projectID, datasetID, tableID, t)
        if overlay, ok := s.deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
                t = applyTableOverlay(t, overlay)
        }
        return tableFromREST(projectID, datasetID, tableID, t), nil
}

// DeleteTable drops a table from the engine catalog.
func (s *TableServer) DeleteTable(
        ctx context.Context,
        req *bigquerypb.DeleteTableRequest,
) (*emptypb.Empty, error) {
        projectID := req.GetProjectId()
        datasetID := req.GetDatasetId()
        tableID := req.GetTableId()
        if s.deps.Catalog == nil {
                return nil, unimplemented("table delete requires an engine")
        }
        if s.deps.Snapshots != nil {
                _ = s.deps.Snapshots.CaptureBeforeDelete(ctx, s.deps.Catalog,
                        projectID, datasetID, tableID)
        }
        _, err := s.deps.Catalog.DropTable(ctx, &enginepb.DropTableRequest{
                Table: &enginepb.TableRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                },
        })
        if err != nil {
                return nil, grpcStatusFromEngine(err)
        }
        s.deps.Metadata.DeleteTable(projectID, datasetID, tableID)
        return &emptypb.Empty{}, nil
}

func schemaToEngineProto(s *enginepb.TableSchema) *bigquerypb.TableSchema {
        if s == nil {
                return nil
        }
        out := &bigquerypb.TableSchema{Fields: make([]*bigquerypb.TableFieldSchema, 0, len(s.GetFields()))}
        for _, f := range s.GetFields() {
                field := &bigquerypb.TableFieldSchema{
                        Name: f.GetName(),
                        Type: f.GetType(),
                        Mode: f.GetMode(),
                }
                if f.GetDescription() != "" {
                        field.Description = wrapperspb.String(f.GetDescription())
                }
                out.Fields = append(out.Fields, field)
        }
        return out
}

func applyTableOverlay(base, overlay bqtypes.Table) bqtypes.Table {
        if overlay.FriendlyName != "" {
                base.FriendlyName = overlay.FriendlyName
        }
        if overlay.Description != "" {
                base.Description = overlay.Description
        }
        if overlay.Type != "" {
                base.Type = overlay.Type
        }
        if overlay.Labels != nil {
                base.Labels = overlay.Labels
        }
        if overlay.Schema != nil {
                base.Schema = overlay.Schema
        }
        if overlay.DefaultCollation != "" {
                base.DefaultCollation = overlay.DefaultCollation
        }
        return base
}

package handlers

import (
        "context"
        "fmt"
        "net/http"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// catalogDatasetExists reports whether `datasetID` is registered in the
// engine catalog for `projectID`. When Catalog is nil (gateway-only unit
// tests) the function returns (true, nil) so handlers keep the legacy
// synthesized GET posture.
func catalogDatasetExists(
        ctx context.Context,
        deps Dependencies,
        projectID, datasetID string,
) (bool, error) {
        if deps.Catalog == nil {
                return true, nil
        }
        resp, err := deps.Catalog.ListDatasets(ctx, &enginepb.ListDatasetsRequest{
                ProjectId: projectID,
        })
        if err != nil {
                return false, err
        }
        for _, ref := range resp.GetDatasets() {
                if ref.GetDatasetId() == datasetID {
                        return true, nil
                }
        }
        return false, nil
}

// writeDatasetNotFound writes the canonical BigQuery REST 404 for a
// missing dataset resource.
func writeDatasetNotFound(w http.ResponseWriter, projectID, datasetID string) {
        writeError(w, http.StatusNotFound, reasonNotFound,
                fmt.Sprintf("Not found: Dataset %s:%s", projectID, datasetID))
}

package handlers

import (
        "fmt"
        "net/http"
        "strings"
)

// headerEmulatorAPIRegion is sent by thirdparty harnesses when the
// Node/Go client uses a regional Google API hostname while the TCP
// connection targets loopback. See third_party/node-bigquery-tests/
// test/setup.js and third_party/golang-bigquery-tests/bqopts.
const headerEmulatorAPIRegion = "X-BigQuery-Emulator-Api-Region"

// datasetMultiRegions is the small multi-region set upstream samples
// exercise. See docs/bigquery/docs/locations.md.
var datasetMultiRegions = map[string]struct{}{
        "US": {},
        "EU": {},
}

// datasetRegions is a subset of supported single regions wide enough
// for thirdparty samples (us-east4/us-central1/eu, ...).
var datasetRegions = map[string]struct{}{
        "africa-south1":           {},
        "asia-east1":              {},
        "asia-east2":              {},
        "asia-northeast1":         {},
        "asia-northeast2":         {},
        "asia-northeast3":         {},
        "asia-south1":             {},
        "asia-south2":             {},
        "asia-southeast1":         {},
        "asia-southeast2":         {},
        "australia-southeast1":    {},
        "australia-southeast2":    {},
        "europe-central2":         {},
        "europe-north1":           {},
        "europe-southwest1":       {},
        "europe-west1":            {},
        "europe-west10":           {},
        "europe-west12":           {},
        "europe-west2":            {},
        "europe-west3":            {},
        "europe-west4":            {},
        "europe-west6":            {},
        "europe-west8":            {},
        "europe-west9":            {},
        "me-central1":             {},
        "me-central2":             {},
        "me-west1":                {},
        "northamerica-northeast1": {},
        "northamerica-northeast2": {},
        "southamerica-east1":      {},
        "southamerica-west1":      {},
        "us-central1":             {},
        "us-east1":                {},
        "us-east4":                {},
        "us-east5":                {},
        "us-south1":               {},
        "us-west1":                {},
        "us-west2":                {},
        "us-west3":                {},
        "us-west4":                {},
}

func emulatorAPIRegion(r *http.Request) string {
        return strings.ToLower(strings.TrimSpace(r.Header.Get(headerEmulatorAPIRegion)))
}

// normalizeDatasetLocation canonicalizes a BigQuery dataset location
// string. Returns empty when the value is not recognized.
func normalizeDatasetLocation(location string) string {
        loc := strings.TrimSpace(location)
        if loc == "" {
                return "US"
        }
        if upper := strings.ToUpper(loc); len(upper) <= 3 {
                if _, ok := datasetMultiRegions[upper]; ok {
                        return upper
                }
        }
        lower := strings.ToLower(loc)
        if _, ok := datasetRegions[lower]; ok {
                return lower
        }
        return ""
}

// locationMatchesAPIRegion enforces regional-endpoint parity exercised
// by node-bigquery-tests `should fail to create a dataset using a
// different region from the client endpoint`.
func locationMatchesAPIRegion(normalizedLocation, apiRegion string) bool {
        if apiRegion == "" {
                return true
        }
        if strings.EqualFold(normalizedLocation, apiRegion) {
                return true
        }
        // eu-bigquery.googleapis.com + location "eu" (normalized to EU).
        if apiRegion == "eu" && normalizedLocation == "EU" {
                return true
        }
        return false
}

// validateDatasetLocation checks the dataset location before any
// engine RPC so invalid regions surface as "Invalid storage region"
// ahead of duplicate-id errors from RegisterDataset.
func validateDatasetLocation(r *http.Request, location string) error {
        normalized := normalizeDatasetLocation(location)
        if normalized == "" {
                raw := strings.TrimSpace(location)
                if raw == "" {
                        raw = "US"
                }
                return fmt.Errorf("Invalid storage region: %s", raw) //nolint:staticcheck // BigQuery client error text
        }
        apiRegion := emulatorAPIRegion(r)
        if !locationMatchesAPIRegion(normalized, apiRegion) {
                display := strings.TrimSpace(location)
                if display == "" {
                        display = normalized
                }
                return fmt.Errorf("Invalid storage region: %s", display) //nolint:staticcheck // BigQuery client error text
        }
        return nil
}

package handlers

import (
        "net/http"
        "strconv"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// datasetKind is the value the BigQuery REST API returns for the
// `kind` field of a Dataset resource. See
// docs/bigquery/docs/reference/rest/v2/datasets/get.md.
const datasetKind = "bigquery#dataset"

// datasetListKind is the `kind` field for a DatasetList response. See
// docs/bigquery/docs/reference/rest/v2/datasets/list.md.
const datasetListKind = "bigquery#datasetList"

// datasetIDFromPath returns the {projectId}/{datasetId} pair captured
// by the route pattern. It strips any trailing AIP-136 custom-method
// suffix (e.g. ":undelete") from the datasetId so the same helper can
// be reused by DatasetCustomMethodPOST.
func datasetIDFromPath(r *http.Request) (projectID, datasetID string) {
        projectID = r.PathValue("projectId")
        datasetID, _ = splitColonOp(r.PathValue("datasetId"))
        return projectID, datasetID
}

// nowMillis is the BigQuery REST representation of a timestamp: a
// decimal string of milliseconds since epoch.
func nowMillis() string {
        return strconv.FormatInt(time.Now().UnixMilli(), 10)
}

// datasetResource builds a Dataset resource for a successful response.
// Stamps Kind, ID, and timestamps; preserves any caller-provided
// metadata (FriendlyName, Description, Location) that the engine does
// not need to know about.
//
// Access is materialized to an empty slice when the caller did not
// provide one. The Java BigQuery client wraps `dataset.getAcl()` in
// `new ArrayList<>(...)`, which NPEs on a null value; live BigQuery
// returns `access: []` for newly-created datasets and ACL-mutation
// flows like AuthorizeDatasetIT depend on that shape.
//
// Labels is materialized to an empty map for the same reason: upstream
// samples call `Object.entries(dataset.metadata.labels)` /
// `dict(dataset.labels)` on the deserialized response, which raises
// `TypeError: Cannot convert undefined or null to object` /
// `TypeError: argument of type 'NoneType' is not iterable` on a nil
// value. The bqtypes.Dataset.Labels tag omits `omitempty` so the empty
// map round-trips as `"labels":{}` on the wire.
func datasetResource(projectID, datasetID string, ds bqtypes.Dataset) bqtypes.Dataset {
        ds.Kind = datasetKind
        ds.ID = projectID + ":" + datasetID
        ds.DatasetReference = bqtypes.DatasetReference{
                ProjectID: projectID,
                DatasetID: datasetID,
        }
        if ds.CreationTime == "" {
                ds.CreationTime = nowMillis()
        }
        if ds.LastModifiedTime == "" {
                ds.LastModifiedTime = ds.CreationTime
        }
        if ds.Access == nil {
                ds.Access = []map[string]any{}
        }
        if ds.Labels == nil {
                ds.Labels = bqtypes.ResourceLabels{}
        }
        if ds.Location == "" {
                ds.Location = "US"
        }
        return ds
}

// DatasetList implements `bigquery.datasets.list`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets
//
// Calls the Catalog.ListDatasets RPC and folds the (deterministically
// ordered, ascending dataset_id) result into a BigQuery datasetList
// envelope. The shape matches
// docs/bigquery/docs/reference/rest/v2/datasets/list.md.
//
// Each returned entry is the minimal dataset-list shape upstream
// emits: kind, id (projectId:datasetId), datasetReference, and an
// empty labels object so client samples that call
// `Object.entries(item.metadata.labels)` on each iteration item do
// not raise (mirrors TestDatasetGetLabelsIsEmptyObjectNotNull).
//
// Pagination: no `nextPageToken` today. The emulator is single-host
// and the catalog never exceeds a handful of datasets in practice;
// the engine helper returns every entry in one shot. When that
// changes (large-catalog stress lane / horizontal sharding) the
// gateway can grow a token by re-keying on dataset_id and slicing
// the response here.
func DatasetList(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                if deps.Catalog == nil {
                        writeJSON(w, http.StatusOK, map[string]any{
                                resourceKeyKind:     datasetListKind,
                                resourceKeyDatasets: []bqtypes.Dataset{},
                        })
                        return
                }
                resp, err := deps.Catalog.ListDatasets(r.Context(), &enginepb.ListDatasetsRequest{
                        ProjectId: projectID,
                })
                if grpcToHTTPError(w, err) {
                        return
                }
                items := make([]map[string]any, 0, len(resp.GetDatasets()))
                for _, ref := range resp.GetDatasets() {
                        labels := bqtypes.ResourceLabels{}
                        if overlay, ok := deps.Metadata.GetDataset(
                                ref.GetProjectId(), ref.GetDatasetId(),
                        ); ok && overlay.Labels != nil {
                                labels = overlay.Labels
                        }
                        items = append(items, map[string]any{
                                "kind": datasetKind,
                                "id":   ref.GetProjectId() + ":" + ref.GetDatasetId(),
                                "datasetReference": bqtypes.DatasetReference{
                                        ProjectID: ref.GetProjectId(),
                                        DatasetID: ref.GetDatasetId(),
                                },
                                "labels": labels,
                        })
                }
                writeJSON(w, http.StatusOK, map[string]any{
                        resourceKeyKind:     datasetListKind,
                        resourceKeyDatasets: items,
                })
        }
}

// DatasetInsert implements `bigquery.datasets.insert`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets
//
// Decodes the Dataset body, calls Catalog.RegisterDataset on the
// engine, and returns the newly-created Dataset resource on success.
// The dataset's `datasetReference.datasetId` is required; the projectId
// is taken from the URL because the upstream API treats the path's
// projectId as authoritative when both are set.
func DatasetInsert(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                ds, ok := decodeDatasetBody(w, r)
                if !ok {
                        return
                }
                datasetID := ds.DatasetReference.DatasetID
                if datasetID == "" {
                        writeError(w, http.StatusBadRequest, "invalid",
                                "datasetReference.datasetId is required")
                        return
                }
                if err := validateDatasetLocation(r, ds.Location); err != nil {
                        writeError(w, http.StatusBadRequest, "invalid", err.Error())
                        return
                }
                if rejectUnsupportedDatasetPosture(w, &ds) {
                        return
                }
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                _, err := deps.Catalog.RegisterDataset(r.Context(), &enginepb.RegisterDatasetRequest{
                        Dataset: &enginepb.DatasetRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                        },
                        Location: ds.Location,
                })
                if grpcToHTTPError(w, err) {
                        return
                }
                deps.Metadata.PutDataset(projectID, datasetID, ds)
                writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
        }
}

// DatasetGet implements `bigquery.datasets.get`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// The Catalog gRPC service does not yet expose a Get RPC (only
// Register/Drop/List), so existence is checked via ListDatasets before
// synthesizing the Dataset resource from path parameters plus any
// MetadataStore overlay.
//
// REST-only metadata (labels, defaultCollation, friendlyName, ...) is
// surfaced from the in-memory MetadataStore so a prior
// Insert/Patch/Update round-trips through GET — required by the node
// `getDatasetLabels` sample's `Object.entries(dataset.metadata.labels)`
// loop.
func DatasetGet(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID := datasetIDFromPath(r)
                exists, err := catalogDatasetExists(r.Context(), deps, projectID, datasetID)
                if err != nil {
                        if grpcToHTTPError(w, err) {
                                return
                        }
                }
                if !exists {
                        writeDatasetNotFound(w, projectID, datasetID)
                        return
                }
                ds := bqtypes.Dataset{}
                if overlay, ok := deps.Metadata.GetDataset(projectID, datasetID); ok {
                        ds = applyDatasetMetadataOverlay(ds, overlay)
                }
                writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
        }
}

// DatasetUpdate implements `bigquery.datasets.update`:
//
//        PUT /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// Full replacement of the Dataset metadata. The engine catalog does
// not yet have an update RPC, so the handler echoes the request body
// back as the canonical resource (stamping kind/id/timestamps) and
// records the REST-only metadata fields in the in-memory store so a
// subsequent GET returns the updated values.
func DatasetUpdate(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID := datasetIDFromPath(r)
                exists, err := catalogDatasetExists(r.Context(), deps, projectID, datasetID)
                if err != nil {
                        if grpcToHTTPError(w, err) {
                                return
                        }
                }
                if !exists {
                        writeDatasetNotFound(w, projectID, datasetID)
                        return
                }
                ds, ok := decodeDatasetBody(w, r)
                if !ok {
                        return
                }
                if rejectUnsupportedDatasetPosture(w, &ds) {
                        return
                }
                deps.Metadata.PutDataset(projectID, datasetID, ds)
                writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
        }
}

// DatasetPatch implements `bigquery.datasets.patch`:
//
//        PATCH /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// Sparse update; mirrors DatasetUpdate's metadata-stash posture so
// upstream `setMetadata` + `getMetadata` sequences roundtrip the
// REST-only fields.
func DatasetPatch(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID := datasetIDFromPath(r)
                exists, err := catalogDatasetExists(r.Context(), deps, projectID, datasetID)
                if err != nil {
                        if grpcToHTTPError(w, err) {
                                return
                        }
                }
                if !exists {
                        writeDatasetNotFound(w, projectID, datasetID)
                        return
                }
                ds, ok := decodeDatasetBody(w, r)
                if !ok {
                        return
                }
                if rejectUnsupportedDatasetPosture(w, &ds) {
                        return
                }
                deps.Metadata.MergeDataset(projectID, datasetID, ds)
                if overlay, ok := deps.Metadata.GetDataset(projectID, datasetID); ok {
                        ds = applyDatasetMetadataOverlay(ds, overlay)
                }
                if ds.LabelsPatchPresent() && len(ds.Labels) == 0 {
                        ds.SetOmitEmptyLabelsOnWire(true)
                }
                writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
        }
}

// DatasetDelete implements `bigquery.datasets.delete`:
//
//        DELETE /bigquery/v2/projects/{projectId}/datasets/{datasetId}
//
// Honors the documented `deleteContents` query parameter by forwarding
// it as DropDatasetRequest.delete_contents; without it the engine
// refuses to drop a non-empty dataset (FailedPrecondition → 400).
func DatasetDelete(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID := datasetIDFromPath(r)
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                deleteContents := r.URL.Query().Get("deleteContents") == queryParamTrue
                _, err := deps.Catalog.DropDataset(r.Context(), &enginepb.DropDatasetRequest{
                        Dataset: &enginepb.DatasetRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                        },
                        DeleteContents:   deleteContents,
                        RestMetadataJson: deps.Metadata.RestMetadataJSON(projectID, datasetID),
                })
                if grpcToHTTPError(w, err) {
                        return
                }
                deps.Metadata.DeleteDataset(projectID, datasetID)
                if deleteContents {
                        deps.Metadata.DeleteTablesInDataset(projectID, datasetID)
                }
                writeJSON(w, http.StatusOK, struct{}{})
        }
}

// DatasetUndelete implements `bigquery.datasets.undelete`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}:undelete
//
// Reached via DatasetCustomMethodPOST after parsing the trailing :op.
func DatasetUndelete(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID := datasetIDFromPath(r)
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                resp, err := deps.Catalog.UndeleteDataset(r.Context(), &enginepb.UndeleteDatasetRequest{
                        Dataset: &enginepb.DatasetRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                        },
                })
                if grpcToHTTPError(w, err) {
                        return
                }
                if resp != nil && resp.GetRestMetadataJson() != "" {
                        deps.Metadata.RestoreDatasetRestMetadataJSON(
                                projectID, datasetID, resp.GetRestMetadataJson())
                }
                ds, ok := deps.Metadata.GetDataset(projectID, datasetID)
                if !ok {
                        ds = bqtypes.Dataset{Location: "US"}
                }
                writeJSON(w, http.StatusOK, datasetResource(projectID, datasetID, ds))
        }
}

// DatasetCustomMethodPOST dispatches the AIP-136 custom-method POST
// endpoints registered against `/datasets/{datasetId}` (which Go's mux
// can't match as `:op` directly). Today the only such method is
// `datasets.undelete`; future BigQuery additions can be added here.
func DatasetCustomMethodPOST(deps Dependencies) http.HandlerFunc {
        undelete := DatasetUndelete(deps)
        return func(w http.ResponseWriter, r *http.Request) {
                _, op := splitColonOp(r.PathValue("datasetId"))
                switch op {
                case "undelete":
                        undelete(w, r)
                case "":
                        writeError(w, http.StatusMethodNotAllowed, "invalid",
                                "POST is not allowed on a dataset resource. "+
                                        "Use POST /datasets to create, or a documented :op "+
                                        "custom method (e.g. :undelete).")
                default:
                        writeError(w, http.StatusNotFound, "notFound",
                                "Unknown dataset custom method ':"+op+"'.")
                }
        }
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datatransfer

import (
        "fmt"
        "net/url"
        "strings"
)

// authTypeOAuth is the BigQuery DataTransfer `authorizationType` value
// for connectors that authenticate via OAuth. Hoisted to a package
// const so the (otherwise repetitive) catalog entries below all
// reference the same source of truth.
const authTypeOAuth = "AUTHORIZATION_TYPE_OAUTH"

// DataSourceCatalogEntry describes a connector surfaced in the
// dataSources list/get responses. AuthorizationURLPlaceholder is
// emitted as JSON `authorizationUrl` when non-empty (an inert .invalid
// host; the emulator does not perform real OAuth or third-party
// traffic).
type DataSourceCatalogEntry struct {
        DataSourceID                   string
        DisplayName                    string
        Description                    string
        AuthorizationType              string
        DefaultDataRefreshIntervalDays int32
        AuthorizationURLPlaceholder    string
}

func buildAuthorizationPlaceholder(template, project, location, dataSourceID string) string {
        if strings.TrimSpace(template) == "" {
                return ""
        }
        if strings.Contains(template, "%") {
                return fmt.Sprintf(template, project, location, dataSourceID)
        }
        return template
}

// builtinDataSourceCatalog returns the connectors the emulator
// surfaces by default. `scheduled_query` exists as the canonical
// SQL-execution surface (the SQL runner follow-up will wire a
// Runner). `amazon_s3` satisfies CreateAmazonS3TransferIT's catalog
// probe even though no transfer is actually performed — the IT only
// asserts the create returned a name. The remaining entries cover
// the third-party `Create*Transfer.java` driver classes; each is a
// metadata-only
// stub (no transfer execution; no third-party traffic).
//
// All third-party rows use the same inert .invalid authorization-URL
// placeholder so `GET .../dataSources/{id}` returns a deterministic
// `authorizationUrl` without ever performing OAuth.
// oauthThirdPartyStubs lists the metadata-only third-party
// connectors the emulator advertises in its dataSources catalog.
// Each row maps directly onto a Create*Transfer.java IT driver class
// (Amazon S3 / Google Ad Manager / Google Ads / Campaign Manager /
// Google Play / Amazon Redshift / Teradata / YouTube Channel /
// YouTube Content Owner). The catalog only carries metadata; no
// transfer execution and no third-party traffic happens.
var oauthThirdPartyStubs = []struct {
        ID, Display, Desc string
}{
        {
                dataSourceAmazonS3,
                "Amazon S3 (emulator catalog stub)",
                "Metadata-only stub for third-party connector discovery; transfer execution and credential validation are not implemented.",
        },
        {
                dataSourceAdManager,
                "Google Ad Manager (emulator catalog stub)",
                "Metadata-only stub for the dfp_dt connector used by CreateAdManagerTransfer; transfer execution is not implemented.",
        },
        {
                dataSourceGoogleAds,
                "Google Ads (emulator catalog stub)",
                "Metadata-only stub for the adwords connector used by CreateAdsTransfer; transfer execution is not implemented.",
        },
        {
                dataSourceCampaignManager,
                "Campaign Manager (emulator catalog stub)",
                "Metadata-only stub for the dcm_dt connector used by CreateCampaignmanagerTransfer; transfer execution is not implemented.",
        },
        {
                dataSourcePlay,
                "Google Play (emulator catalog stub)",
                "Metadata-only stub for the play connector used by CreatePlayTransfer; transfer execution is not implemented.",
        },
        {
                dataSourceRedshift,
                "Amazon Redshift (emulator catalog stub)",
                "Metadata-only stub for the redshift connector used by CreateRedshiftTransfer; transfer execution and credential validation are not implemented.",
        },
        {
                dataSourceOnPremises,
                "Teradata / on-premises (emulator catalog stub)",
                "Metadata-only stub for the on_premises connector used by CreateTeradataTransfer; transfer execution and Teradata agent integration are not implemented.",
        },
        {
                dataSourceYoutubeChannel,
                "YouTube Channel (emulator catalog stub)",
                "Metadata-only stub for the youtube_channel connector used by CreateYoutubeChannelTransfer; transfer execution is not implemented.",
        },
        {
                dataSourceYoutubeContentOwner,
                "YouTube Content Owner (emulator catalog stub)",
                "Metadata-only stub for the youtube_content_owner connector used by CreateYoutubeContentOwnerTransfer; transfer execution is not implemented.",
        },
}

// oauthAuthorizationURLPlaceholder is the inert .invalid URL the
// catalog emits as `authorizationUrl` for every OAuth third-party
// stub. Lifted to a package const so the test fixtures and the
// catalog-builder share the same source of truth.
const oauthAuthorizationURLPlaceholder = "https://oauth-emulator.invalid/authorize?response_type=code&client_id=emulator-not-configured&data_source_id=%[3]s&project=%[1]s&location=%[2]s"

func builtinDataSourceCatalog() []DataSourceCatalogEntry {
        out := []DataSourceCatalogEntry{
                {
                        DataSourceID:      dataSourceScheduledQuery,
                        DisplayName:       "Scheduled Query (emulator)",
                        Description:       "Runs BigQuery SQL on demand via startManualRuns or POST .../runs when a ScheduledQueryRunner is wired; no cron or third-party I/O.",
                        AuthorizationType: "AUTHORIZATION_TYPE_GOOGLE_PLUS_AUTHORIZATION_CODE",
                },
        }
        for _, s := range oauthThirdPartyStubs {
                out = append(out, oauthStubEntry(s.ID, s.Display, s.Desc))
        }
        return out
}

// oauthStubEntry builds a metadata-only OAuth third-party catalog
// entry. The OAuth-related fields (authorization type, daily refresh,
// inert .invalid authorization URL placeholder) are the same for
// every third-party stub, so the per-row table only carries the
// fields that actually differ.
func oauthStubEntry(id, display, desc string) DataSourceCatalogEntry {
        return DataSourceCatalogEntry{
                DataSourceID:                   id,
                DisplayName:                    display,
                Description:                    desc,
                AuthorizationType:              authTypeOAuth,
                DefaultDataRefreshIntervalDays: 1,
                AuthorizationURLPlaceholder:    oauthAuthorizationURLPlaceholder,
        }
}

func (h *Handler) mergedCatalogEntries() []DataSourceCatalogEntry {
        base := builtinDataSourceCatalog()
        if h == nil || len(h.DataSourceCatalogExtras) == 0 {
                return base
        }
        byID := make(map[string]DataSourceCatalogEntry)
        order := make([]string, 0, len(base)+len(h.DataSourceCatalogExtras))
        for _, e := range base {
                id := strings.TrimSpace(e.DataSourceID)
                if id == "" {
                        continue
                }
                e.DataSourceID = id
                byID[id] = e
                order = append(order, id)
        }
        for _, e := range h.DataSourceCatalogExtras {
                id := strings.TrimSpace(e.DataSourceID)
                if id == "" {
                        continue
                }
                e.DataSourceID = id
                if _, exists := byID[id]; !exists {
                        order = append(order, id)
                }
                byID[id] = e
        }
        out := make([]DataSourceCatalogEntry, 0, len(order))
        for _, id := range order {
                out = append(out, byID[id])
        }
        return out
}

func (h *Handler) catalogEntryByID(id string) (DataSourceCatalogEntry, bool) {
        id = strings.TrimSpace(id)
        if id == "" {
                return DataSourceCatalogEntry{}, false
        }
        for _, e := range h.mergedCatalogEntries() {
                if e.DataSourceID == id {
                        return e, true
                }
        }
        return DataSourceCatalogEntry{}, false
}

func (h *Handler) dataSourceResource(project, location string, e DataSourceCatalogEntry) dataSourceResource {
        name := fmt.Sprintf("projects/%s/locations/%s/dataSources/%s", project, location, e.DataSourceID)
        r := dataSourceResource{
                Name:                           name,
                DataSourceID:                   e.DataSourceID,
                DisplayName:                    e.DisplayName,
                Description:                    e.Description,
                AuthorizationType:              e.AuthorizationType,
                DefaultDataRefreshIntervalDays: e.DefaultDataRefreshIntervalDays,
        }
        if u := buildAuthorizationPlaceholder(e.AuthorizationURLPlaceholder, project, location, e.DataSourceID); u != "" {
                // Guard: placeholders must stay on the inert host or relative;
                // never emit bare secrets.
                if parsed, err := url.Parse(u); err == nil && parsed.Scheme != "" && parsed.Host != "" {
                        r.AuthorizationURL = u
                }
        }
        return r
}

// Package datatransfer implements a minimal BigQuery Data Transfer Service
// REST shell on the emulator's HTTP mux: dataSources catalog, transferConfigs
// CRUD (in-memory), transferRuns CRUD, and the AIP-136 custom methods
// (`scheduleRuns`, `checkValidCreds`, `startManualRuns`).
//
// Shallow-emulator port per docs/ENGINE_POLICY.md.
// The apiregion.CheckHTTP location-mismatch gate is intentionally dropped
// here — this repo's REST surface does not yet surface regional endpoints
// and the emulator's docker-compose listener is always loopback. Logging
// goes through `log/slog.New(slog.DiscardHandler)` when the caller does
// not provide one.
//
// The package is wired by `gateway/server.go` via `(*Handler).Register(mux)`.
// Routes registered:
//
//        GET    /v1/projects/{projectId}/locations/{location}/dataSources
//        GET    /v1/projects/{projectId}/locations/{location}/dataSources/{dataSourceId}
//        GET    /v1/projects/{projectId}/transferConfigs
//        POST   /v1/projects/{projectId}/transferConfigs
//        GET    /v1/projects/{projectId}/locations/{location}/transferConfigs
//        POST   /v1/projects/{projectId}/locations/{location}/transferConfigs
//        GET    /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}
//        PATCH  /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}
//        DELETE /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}
//        POST   /v1/projects/{projectId}/locations/{location}/transferConfigs/{configSeg} (AIP-136 :scheduleRuns /
//
// :checkValidCreds / :startManualRuns)
//
//        GET    /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}/runs
//        POST   /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}/runs
//        GET    /v1/projects/{projectId}/locations/{location}/transferConfigs/{configId}/runs/{runId}
package datatransfer

import (
        "crypto/rand"
        "encoding/hex"
        "encoding/json"
        "fmt"
        "io"
        "log/slog"
        "net/http"
        "slices"
        "sort"
        "strconv"
        "strings"
        "sync"
        "sync/atomic"
        "time"
)

// transferStateSucceeded is the JSON state string for a completed
// transfer config or run.
const transferStateSucceeded = "SUCCEEDED"

// transferStateFailed is the JSON state string for a failed transfer
// run.
const transferStateFailed = "FAILED"

// dataSourceScheduledQuery is the dataSourceId for the scheduled SQL
// connector. The emulator only executes this surface when a Runner is
// wired (the SQL runner follow-up).
const dataSourceScheduledQuery = "scheduled_query"

// dataSourceAmazonS3 is a metadata-only stub for third-party
// connector discovery (the failing-IT baseline row 13:
// CreateAmazonS3TransferIT).
const dataSourceAmazonS3 = "amazon_s3"

// The following dataSourceId constants are the connector identifiers
// the upstream `Create*Transfer.java` driver classes send on
// CreateTransferConfig. They are metadata-only stubs; the emulator
// does not perform any third-party traffic. The connector IDs come
// directly from the snippet drivers (see e.g. CreateAdManagerTransfer
// → `dfp_dt`, CreateAdsTransfer → `adwords`, CreateTeradataTransfer →
// `on_premises`). The earlier shallow-emulator design listed three
// IDs that diverge from what the drivers send (`admanager_transfer`,
// `google_ads`, `teradata`); registering the driver-side IDs is what
// actually moves CreateTransferConfig forward, so we follow the
// drivers here.
const (
        dataSourceAdManager           = "dfp_dt"
        dataSourceGoogleAds           = "adwords"
        dataSourceCampaignManager     = "dcm_dt"
        dataSourcePlay                = "play"
        dataSourceRedshift            = "redshift"
        dataSourceOnPremises          = "on_premises"
        dataSourceYoutubeChannel      = "youtube_channel"
        dataSourceYoutubeContentOwner = "youtube_content_owner"
)

const transferRunErrorMessageKey = "message"

func transferRunErrorPayload(msg string) map[string]any {
        return map[string]any{transferRunErrorMessageKey: msg}
}

// ScheduledQueryRunner executes scheduled_query transfer SQL against
// the emulator catalog. The shallow-emulator port keeps this as a
// hook the gateway can fill in once `gateway/handlers/queries.go` is
// reachable from the gRPC-free unit-test path; left nil for now (no
// SQL execution).
type ScheduledQueryRunner interface {
        RunScheduledQueryTransfer(project, location, sql, defaultDatasetID string) error
}

// Handler stores transfer config and run metadata in memory.
type Handler struct {
        Log *slog.Logger
        // Runner optional; when set, scheduled_query manual runs and run
        // inserts execute SQL locally.
        Runner ScheduledQueryRunner
        // DataSourceCatalogExtras are merged into the built-in dataSources
        // catalog (same dataSourceId: extras win).
        DataSourceCatalogExtras []DataSourceCatalogEntry

        mu        sync.Mutex
        nextRunID atomic.Uint64
        configs   map[string]*transferConfigResource
        runs      map[string]*transferRunResource
}

// NewHandler returns an empty in-memory transfer service shell.
func NewHandler(log *slog.Logger) *Handler {
        return &Handler{
                Log:     log,
                configs: make(map[string]*transferConfigResource),
                runs:    make(map[string]*transferRunResource),
        }
}

func (h *Handler) logger() *slog.Logger {
        if h != nil && h.Log != nil {
                return h.Log
        }
        return slog.New(slog.DiscardHandler)
}

// Register wires v1 transfer config + run + dataSource routes into the
// caller's mux. The path shape matches the upstream BigQuery Data
// Transfer API (the gapic clients construct paths under `/v1/...`).
func (h *Handler) Register(mux *http.ServeMux) {
        loc := "/v1/projects/{projectId}/locations/{location}"
        mux.HandleFunc(http.MethodGet+" "+loc+"/dataSources", h.handleListDataSources)
        mux.HandleFunc(http.MethodGet+" "+loc+"/dataSources/{dataSourceId}", h.handleGetDataSource)

        // Project-scoped (no /locations/) variant: gapic Go REST clients
        // construct parent="projects/{p}" for create/list.
        projBase := "/v1/projects/{projectId}/transferConfigs"
        mux.HandleFunc(http.MethodGet+" "+projBase, h.handleListConfigsProjectScoped)
        mux.HandleFunc(http.MethodPost+" "+projBase, h.handleCreateConfigProjectScoped)

        base := loc + "/transferConfigs"
        mux.HandleFunc(http.MethodGet+" "+base, h.handleListConfigs)
        mux.HandleFunc(http.MethodPost+" "+base, h.handleCreateConfig)
        mux.HandleFunc(http.MethodGet+" "+base+"/{configId}", h.handleGetConfig)
        mux.HandleFunc(http.MethodPatch+" "+base+"/{configId}", h.handlePatchConfig)
        mux.HandleFunc(http.MethodDelete+" "+base+"/{configId}", h.handleDeleteConfig)
        mux.HandleFunc(http.MethodPost+" "+base+"/{configSeg}", h.handleConfigPostSegment)

        runsBase := base + "/{configId}/runs"
        mux.HandleFunc(http.MethodGet+" "+runsBase, h.handleListRuns)
        mux.HandleFunc(http.MethodPost+" "+runsBase, h.handleCreateRun)
        mux.HandleFunc(http.MethodGet+" "+runsBase+"/{runId}", h.handleGetRun)
}

// transferConfigResource is the JSON-on-wire shape for a single
// transfer config. Mirrors the proto3 field names the upstream gapic
// clients emit (camelCase). Disabled is *bool (not bool) so the patch
// path can distinguish "not in mask" from "set to false" — that is the
// fix the failing-IT rows 14 (DisableTransferConfigIT) and 15
// (ReEnableTransferConfigIT) exercise.
type transferConfigResource struct {
        Name                 string         `json:"name,omitempty"`
        DisplayName          string         `json:"displayName,omitempty"`
        DataSourceID         string         `json:"dataSourceId,omitempty"`
        Schedule             string         `json:"schedule,omitempty"`
        Params               map[string]any `json:"params,omitempty"`
        State                string         `json:"state,omitempty"`
        Disabled             *bool          `json:"disabled,omitempty"`
        CreateTime           string         `json:"createTime,omitempty"`
        NextRunTime          string         `json:"nextRunTime,omitempty"`
        UserID               int64          `json:"userId,omitempty"`
        DatasetRegion        string         `json:"datasetRegion,omitempty"`
        DestinationDatasetID string         `json:"destinationDatasetId,omitempty"`
        DestinationDataset   *struct {
                DatasetReference *struct {
                        ProjectID string `json:"projectId,omitempty"`
                        DatasetID string `json:"datasetId,omitempty"`
                } `json:"datasetReference,omitempty"`
        } `json:"destinationDataset,omitempty"`
        DisableAutoScheduling bool `json:"disableAutoScheduling,omitempty"`
}

type listConfigsResponse struct {
        TransferConfigs []transferConfigResource `json:"transferConfigs"`
        NextPageToken   string                   `json:"nextPageToken,omitempty"`
}

type dataSourceResource struct {
        Name                           string `json:"name"`
        DataSourceID                   string `json:"dataSourceId"`
        DisplayName                    string `json:"displayName,omitempty"`
        Description                    string `json:"description,omitempty"`
        AuthorizationType              string `json:"authorizationType,omitempty"`
        DefaultDataRefreshIntervalDays int32  `json:"defaultDataRefreshIntervalDays,omitempty"`
        AuthorizationURL               string `json:"authorizationUrl,omitempty"`
}

type listDataSourcesResponse struct {
        DataSources   []dataSourceResource `json:"dataSources"`
        NextPageToken string               `json:"nextPageToken,omitempty"`
}

type transferRunResource struct {
        Name               string         `json:"name"`
        State              string         `json:"state,omitempty"`
        Errors             []any          `json:"errors,omitempty"`
        ScheduleTime       string         `json:"scheduleTime,omitempty"`
        RunTime            string         `json:"runTime,omitempty"`
        UpdateTime         string         `json:"updateTime,omitempty"`
        DataSourceID       string         `json:"dataSourceId,omitempty"`
        Params             map[string]any `json:"params,omitempty"`
        DatasetRegion      string         `json:"datasetRegion,omitempty"`
        DestinationDataset *struct {
                DatasetReference *struct {
                        ProjectID string `json:"projectId,omitempty"`
                        DatasetID string `json:"datasetId,omitempty"`
                } `json:"datasetReference,omitempty"`
        } `json:"destinationDataset,omitempty"`
}

func configName(project, location, id string) string {
        return fmt.Sprintf("projects/%s/locations/%s/transferConfigs/%s", project, location, id)
}

func runName(project, location, configID, runID string) string {
        return configName(project, location, configID) + "/runs/" + runID
}

// writeAPIError emits a Google-style error envelope (mirrors
// `gateway/handlers.writeError`'s shape). Localised here so the
// package does not import handlers (and doesn't pull the
// engine-client deps in with it).
func writeAPIError(log *slog.Logger, w http.ResponseWriter, status int, msg string) {
        if log != nil {
                log.Error("datatransfer api error",
                        slog.Int("status", status),
                        slog.String("message", msg),
                )
        }
        body := map[string]any{
                "error": map[string]any{
                        "code":    status,
                        "message": msg,
                        "status":  apiErrorReason(status),
                        "errors": []map[string]any{{
                                "reason":  apiErrorReason(status),
                                "message": msg,
                                "domain":  "global",
                        }},
                },
        }
        writeJSON(log, w, status, body)
}

func apiErrorReason(status int) string {
        switch status {
        case http.StatusBadRequest:
                return "badRequest"
        case http.StatusUnauthorized:
                return "unauthorized"
        case http.StatusForbidden:
                return "forbidden"
        case http.StatusNotFound:
                return "notFound"
        case http.StatusConflict:
                return "alreadyExists"
        case http.StatusInternalServerError:
                return "internalError"
        case http.StatusNotImplemented:
                return "notImplemented"
        case http.StatusMethodNotAllowed:
                return "methodNotAllowed"
        default:
                if status >= 500 {
                        return "internalError"
                }
                if status >= 400 {
                        return "badRequest"
                }
                return "unknown"
        }
}

func writeJSON(log *slog.Logger, w http.ResponseWriter, status int, v any) {
        w.Header().Set("Content-Type", "application/json; charset=UTF-8")
        w.WriteHeader(status)
        if err := json.NewEncoder(w).Encode(v); err != nil && log != nil {
                log.Error("datatransfer: encode json response", slog.String("err", err.Error()))
        }
}

func (h *Handler) handleListDataSources(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        entries := h.mergedCatalogResources(project, location)
        start, end := pageWindow(len(entries), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
        page := entries[start:end]
        resp := listDataSourcesResponse{DataSources: page}
        if end < len(entries) {
                resp.NextPageToken = strconv.Itoa(end)
        }
        writeJSON(h.logger(), w, http.StatusOK, resp)
}

func (h *Handler) mergedCatalogResources(project, location string) []dataSourceResource {
        entries := h.mergedCatalogEntries()
        out := make([]dataSourceResource, 0, len(entries))
        for _, e := range entries {
                out = append(out, h.dataSourceResource(project, location, e))
        }
        return out
}

func (h *Handler) handleGetDataSource(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        dsID := strings.TrimSpace(r.PathValue("dataSourceId"))
        entry, ok := h.catalogEntryByID(dsID)
        if !ok {
                writeAPIError(h.logger(), w, http.StatusNotFound,
                        "Not found: DataSource "+dsID)
                return
        }
        out := h.dataSourceResource(project, location, entry)
        writeJSON(h.logger(), w, http.StatusOK, out)
}

func parseDataSourceIDsFilter(q map[string][]string) []string {
        ids := q["dataSourceIds"]
        if len(ids) == 0 {
                ids = q["dataSourceIds[]"]
        }
        out := make([]string, 0, len(ids))
        for _, id := range ids {
                if s := strings.TrimSpace(id); s != "" {
                        out = append(out, s)
                }
        }
        return out
}

func configMatchesDataSourceFilter(cfg *transferConfigResource, filter []string) bool {
        if len(filter) == 0 || cfg == nil {
                return true
        }
        ds := strings.TrimSpace(cfg.DataSourceID)
        return slices.Contains(filter, ds)
}

func (h *Handler) handleListConfigs(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        prefix := fmt.Sprintf("projects/%s/locations/%s/transferConfigs/", project, location)
        dsFilter := parseDataSourceIDsFilter(r.URL.Query())

        h.mu.Lock()
        var keys []string
        for k := range h.configs {
                if strings.HasPrefix(k, prefix) {
                        keys = append(keys, k)
                }
        }
        h.mu.Unlock()
        sort.Strings(keys)

        h.mu.Lock()
        filtered := make([]string, 0, len(keys))
        for _, k := range keys {
                if c, ok := h.configs[k]; ok && configMatchesDataSourceFilter(c, dsFilter) {
                        filtered = append(filtered, k)
                }
        }
        h.mu.Unlock()

        start, end := pageWindow(len(filtered), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
        pageKeys := filtered[start:end]

        h.mu.Lock()
        defer h.mu.Unlock()
        out := make([]transferConfigResource, 0, len(pageKeys))
        for _, k := range pageKeys {
                if c, ok := h.configs[k]; ok {
                        out = append(out, *c)
                }
        }
        resp := listConfigsResponse{TransferConfigs: out}
        if end < len(filtered) {
                resp.NextPageToken = strconv.Itoa(end)
        }
        writeJSON(h.logger(), w, http.StatusOK, resp)
}

func (h *Handler) handleCreateConfig(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid body")
                return
        }
        _ = r.Body.Close()
        var in transferConfigResource
        if len(strings.TrimSpace(string(body))) > 0 {
                if err := json.Unmarshal(body, &in); err != nil {
                        writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid json: "+err.Error())
                        return
                }
        }
        normalizeTransferConfigInput(project, &in)
        h.finishCreateTransferConfig(w, project, location, in)
}

func (h *Handler) finishCreateTransferConfig(
        w http.ResponseWriter,
        project, location string,
        in transferConfigResource,
) {
        id := pathSuffixOrGen(in.Name, "tc_"+randomHex(16))
        name := configName(project, location, id)
        now := time.Now().UTC().Format(time.RFC3339Nano)
        nextRun := strings.TrimSpace(in.NextRunTime)
        uid := int64(1)
        if in.UserID != 0 {
                uid = in.UserID
        }
        rec := transferConfigResource{
                Name:                  name,
                DisplayName:           in.DisplayName,
                DataSourceID:          in.DataSourceID,
                Schedule:              in.Schedule,
                Params:                in.Params,
                State:                 transferStateSucceeded,
                Disabled:              in.Disabled,
                CreateTime:            now,
                NextRunTime:           nextRun,
                UserID:                uid,
                DatasetRegion:         strings.TrimSpace(in.DatasetRegion),
                DestinationDatasetID:  strings.TrimSpace(in.DestinationDatasetID),
                DisableAutoScheduling: in.DisableAutoScheduling,
        }
        if in.DestinationDataset != nil {
                rec.DestinationDataset = in.DestinationDataset
        }

        h.mu.Lock()
        if _, dup := h.configs[name]; dup {
                h.mu.Unlock()
                writeAPIError(h.logger(), w, http.StatusConflict, "transfer config already exists")
                return
        }
        h.configs[name] = &rec
        h.maybeSeedInitialScheduledQueryRun(project, location, id, &rec)
        out := rec
        h.mu.Unlock()
        writeJSON(h.logger(), w, http.StatusOK, out)
}

func (h *Handler) maybeSeedInitialScheduledQueryRun(project, location, configID string, cfg *transferConfigResource) {
        if h == nil || cfg == nil {
                return
        }
        if strings.TrimSpace(cfg.DataSourceID) != dataSourceScheduledQuery {
                return
        }
        run := h.newTransferRun(project, location, configID, cfg)
        if h.Runner != nil && !cfg.DisableAutoScheduling && strings.TrimSpace(cfg.Schedule) != "" {
                if stop, _, msg := h.maybeExecuteScheduledQueryOnRun(project, location, cfg, run); stop {
                        run.State = transferStateFailed
                        run.Errors = []any{transferRunErrorPayload(msg)}
                }
        }
        h.runs[run.Name] = run
}

func pathSuffixOrGen(name, fallbackID string) string {
        name = strings.TrimSpace(name)
        if name == "" {
                return fallbackID
        }
        if i := strings.LastIndex(name, "/"); i >= 0 {
                return name[i+1:]
        }
        return fallbackID
}

func randomHex(n int) string {
        buf := make([]byte, n)
        _, _ = rand.Read(buf)
        return hex.EncodeToString(buf)
}

func (h *Handler) handleGetConfig(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        id := r.PathValue("configId")
        name := configName(project, location, id)

        h.mu.Lock()
        defer h.mu.Unlock()
        c, ok := h.configs[name]
        if !ok {
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+id)
                return
        }
        out := *c
        writeJSON(h.logger(), w, http.StatusOK, out)
}

package datatransfer

import (
        "encoding/json"
        "errors"
        "io"
        "net/http"
        "strings"
)

// handlePatchConfig honors the `disabled` field on the request body;
// because Disabled is *bool, an explicit `"disabled": false` flips a
// disabled config back on (failing-IT row 15:
// ReEnableTransferConfigIT) and `"disabled": true` disables it (row
// 14: DisableTransferConfigIT). Other fields update only when
// non-zero.
//
// updateMask is parsed from the `updateMask` query parameter (gapic
// REST clients append it). The shallow-emulator port keeps the mask
// advisory: the mask names are not enforced, the body's non-zero
// fields drive the patch.
// That matches the existing emulator pattern for other PATCH
// endpoints.
func (h *Handler) handlePatchConfig(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        id := r.PathValue("configId")
        name := configName(project, location, id)
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid body")
                return
        }
        _ = r.Body.Close()
        var patch transferConfigResource
        if err := json.Unmarshal(body, &patch); err != nil {
                writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid json: "+err.Error())
                return
        }

        h.mu.Lock()
        defer h.mu.Unlock()
        cur, ok := h.configs[name]
        if !ok {
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+id)
                return
        }
        if patch.DisplayName != "" {
                cur.DisplayName = patch.DisplayName
        }
        if patch.Schedule != "" {
                cur.Schedule = patch.Schedule
        }
        if patch.Params != nil {
                cur.Params = patch.Params
        }
        if patch.DatasetRegion != "" {
                cur.DatasetRegion = patch.DatasetRegion
        }
        if patch.DestinationDatasetID != "" {
                cur.DestinationDatasetID = patch.DestinationDatasetID
        }
        if patch.DestinationDataset != nil {
                cur.DestinationDataset = patch.DestinationDataset
        }
        if patch.NextRunTime != "" {
                cur.NextRunTime = patch.NextRunTime
        }
        if patch.Disabled != nil {
                cur.Disabled = patch.Disabled
        }
        out := *cur
        writeJSON(h.logger(), w, http.StatusOK, out)
}

func (h *Handler) handleDeleteConfig(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        id := r.PathValue("configId")
        name := configName(project, location, id)

        h.mu.Lock()
        if _, ok := h.configs[name]; !ok {
                h.mu.Unlock()
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+id)
                return
        }
        delete(h.configs, name)
        prefix := name + "/runs/"
        for k := range h.runs {
                if strings.HasPrefix(k, prefix) {
                        delete(h.runs, k)
                }
        }
        h.mu.Unlock()
        w.WriteHeader(http.StatusOK)
}

// readOptionalJSONProbe consumes an optional JSON body for the AIP-136
// custom methods (`:checkValidCreds`, `:startManualRuns`). Returning
// nil means the caller may proceed; a non-nil error is the wire-shape
// reason for a 400.
func readOptionalJSONProbe(r *http.Request) error {
        body, err := io.ReadAll(io.LimitReader(r.Body, 1<<20))
        if err != nil {
                return errors.New("invalid body")
        }
        _ = r.Body.Close()
        if len(strings.TrimSpace(string(body))) == 0 {
                return nil
        }
        var probe map[string]any
        if err := json.Unmarshal(body, &probe); err != nil {
                return errors.New("invalid json: " + err.Error())
        }
        return nil
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datatransfer

import (
        "fmt"
        "net/http"
        "sort"
        "strconv"
        "strings"
        "time"
)

type listRunsResponse struct {
        TransferRuns  []transferRunResource `json:"transferRuns"`
        NextPageToken string                `json:"nextPageToken,omitempty"`
}

func (h *Handler) handleListRuns(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        configID := r.PathValue("configId")
        prefix := configName(project, location, configID) + "/runs/"

        h.mu.Lock()
        var keys []string
        for k := range h.runs {
                if strings.HasPrefix(k, prefix) {
                        keys = append(keys, k)
                }
        }
        h.mu.Unlock()
        sort.Strings(keys)
        start, end := pageWindow(len(keys), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
        pageKeys := keys[start:end]

        h.mu.Lock()
        defer h.mu.Unlock()
        out := make([]transferRunResource, 0, len(pageKeys))
        for _, k := range pageKeys {
                if run, ok := h.runs[k]; ok {
                        out = append(out, *run)
                }
        }
        resp := listRunsResponse{TransferRuns: out}
        if end < len(keys) {
                resp.NextPageToken = strconv.Itoa(end)
        }
        writeJSON(h.logger(), w, http.StatusOK, resp)
}

func unsupportedCreateRunDataSource(ds string) (string, bool) {
        ds = strings.TrimSpace(ds)
        if ds != "" && ds != dataSourceScheduledQuery {
                return fmt.Sprintf(
                        "transfer run creation for data source %q is not supported by the emulator (metadata catalog only)",
                        ds,
                ), true
        }
        return "", false
}

func (h *Handler) newTransferRun(project, location, configID string, cp *transferConfigResource) *transferRunResource {
        runID := h.allocRunID()
        name := runName(project, location, configID, runID)
        now := time.Now().UTC().Format(time.RFC3339Nano)
        run := &transferRunResource{
                Name:          name,
                DataSourceID:  cp.DataSourceID,
                Params:        cp.Params,
                DatasetRegion: cp.DatasetRegion,
                ScheduleTime:  now,
                RunTime:       now,
                UpdateTime:    now,
                State:         transferStateSucceeded,
        }
        if cp.DestinationDataset != nil {
                run.DestinationDataset = cp.DestinationDataset
        }
        return run
}

// maybeExecuteScheduledQueryOnRun runs SQL when a Runner is wired;
// mutates run state on failure. Returns stop=true with status+message
// only when the input is invalid (bad params); a Runner failure
// itself becomes a FAILED run, not a 4xx.
func (h *Handler) maybeExecuteScheduledQueryOnRun(
        project, location string,
        cp *transferConfigResource,
        run *transferRunResource,
) (stop bool, status int, msg string) {
        if h.Runner == nil || strings.TrimSpace(cp.DataSourceID) != dataSourceScheduledQuery {
                return false, 0, ""
        }
        sql, err := scheduledQueryText(cp.Params)
        if err != nil {
                return true, http.StatusBadRequest, err.Error()
        }
        defDS := destinationDatasetID(cp)
        if defDS == "" {
                defDS = strings.TrimSpace(cp.DestinationDatasetID)
        }
        if err := h.Runner.RunScheduledQueryTransfer(project, location, sql, defDS); err != nil {
                run.State = transferStateFailed
                run.Errors = []any{transferRunErrorPayload(err.Error())}
        } else {
                run.State = transferStateSucceeded
        }
        return false, 0, ""
}

func (h *Handler) handleCreateRun(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        configID := r.PathValue("configId")
        cfgName := configName(project, location, configID)

        h.mu.Lock()
        cfg, ok := h.configs[cfgName]
        if !ok {
                h.mu.Unlock()
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+configID)
                return
        }
        cp := *cfg
        h.mu.Unlock()

        if msg, bad := unsupportedCreateRunDataSource(cp.DataSourceID); bad {
                writeAPIError(h.logger(), w, http.StatusNotImplemented, msg)
                return
        }

        run := h.newTransferRun(project, location, configID, &cp)
        if stop, st, m := h.maybeExecuteScheduledQueryOnRun(project, location, &cp, run); stop {
                writeAPIError(h.logger(), w, st, m)
                return
        }

        h.mu.Lock()
        h.runs[run.Name] = run
        out := *run
        h.mu.Unlock()
        writeJSON(h.logger(), w, http.StatusOK, out)
}

func (h *Handler) handleGetRun(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        configID := r.PathValue("configId")
        runID := r.PathValue("runId")
        name := runName(project, location, configID, runID)

        h.mu.Lock()
        defer h.mu.Unlock()
        run, ok := h.runs[name]
        if !ok {
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferRun "+runID)
                return
        }
        out := *run
        writeJSON(h.logger(), w, http.StatusOK, out)
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datatransfer

import (
        "errors"
        "fmt"
        "net/http"
        "strings"
        "time"
)

type startManualTransferRunsResponse struct {
        Runs []transferRunResource `json:"runs"`
}

func (h *Handler) allocRunID() string {
        n := h.nextRunID.Add(1)
        return fmt.Sprintf("run_%d", n)
}

func scheduledQueryText(params map[string]any) (string, error) {
        if params == nil {
                return "", errors.New("scheduled_query transfer config requires params.query")
        }
        raw, ok := params["query"]
        if !ok || raw == nil {
                return "", errors.New("scheduled_query transfer config requires params.query")
        }
        s, ok := raw.(string)
        if !ok {
                return "", errors.New("params.query must be a string")
        }
        s = strings.TrimSpace(s)
        if s == "" {
                return "", errors.New("params.query must be non-empty")
        }
        return s, nil
}

func destinationDatasetID(cfg *transferConfigResource) string {
        if cfg == nil {
                return ""
        }
        if cfg.DestinationDataset != nil && cfg.DestinationDataset.DatasetReference != nil {
                if did := strings.TrimSpace(cfg.DestinationDataset.DatasetReference.DatasetID); did != "" {
                        return did
                }
        }
        return strings.TrimSpace(cfg.DestinationDatasetID)
}

// handleConfigPostSegment dispatches AIP-136 custom-method POST
// endpoints (`{configId}:scheduleRuns`, `:checkValidCreds`,
// `:startManualRuns`). Go's net/http mux can't match a literal
// segment after a wildcard, so we register the parent
// `{configSeg}` and split on the trailing `:op`.
func (h *Handler) handleConfigPostSegment(w http.ResponseWriter, r *http.Request) {
        seg := r.PathValue("configSeg")
        id, action, ok := strings.Cut(seg, ":")
        if !ok || id == "" || action == "" {
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found")
                return
        }
        project := r.PathValue("projectId")
        location := r.PathValue("location")
        switch action {
        case "scheduleRuns":
                writeAPIError(h.logger(), w, http.StatusNotImplemented,
                        "scheduleRuns is not supported by the emulator (no backfill or cron execution)")
        case "checkValidCreds":
                if err := readOptionalJSONProbe(r); err != nil {
                        writeAPIError(h.logger(), w, http.StatusBadRequest, err.Error())
                        return
                }
                // No live OAuth or vendor credential checks; clients can probe
                // predictably.
                writeJSON(h.logger(), w, http.StatusOK, map[string]any{"hasValidCreds": false})
        case "startManualRuns":
                if err := readOptionalJSONProbe(r); err != nil {
                        writeAPIError(h.logger(), w, http.StatusBadRequest, err.Error())
                        return
                }
                h.handleStartManualRuns(w, project, location, id)
        default:
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found")
        }
}

func (h *Handler) handleStartManualRuns(w http.ResponseWriter, project, location, configID string) {
        cfgName := configName(project, location, configID)
        h.mu.Lock()
        cfg, ok := h.configs[cfgName]
        if !ok {
                h.mu.Unlock()
                writeAPIError(h.logger(), w, http.StatusNotFound, "Not found: TransferConfig "+configID)
                return
        }
        cp := *cfg
        h.mu.Unlock()

        ds := strings.TrimSpace(cp.DataSourceID)
        if ds != dataSourceScheduledQuery {
                writeAPIError(h.logger(), w, http.StatusNotImplemented,
                        fmt.Sprintf("manual runs for data source %q are not supported by the emulator", ds))
                return
        }
        if h.Runner == nil {
                writeAPIError(h.logger(), w, http.StatusNotImplemented,
                        "scheduled_query execution is not configured (emulator metadata-only mode)")
                return
        }
        sql, err := scheduledQueryText(cp.Params)
        if err != nil {
                writeAPIError(h.logger(), w, http.StatusBadRequest, err.Error())
                return
        }
        defDS := destinationDatasetID(&cp)
        if defDS == "" {
                defDS = strings.TrimSpace(cp.DestinationDatasetID)
        }
        runID := h.allocRunID()
        runFull := runName(project, location, configID, runID)
        now := time.Now().UTC().Format(time.RFC3339Nano)
        run := &transferRunResource{
                Name:          runFull,
                DataSourceID:  cp.DataSourceID,
                Params:        cp.Params,
                DatasetRegion: cp.DatasetRegion,
                ScheduleTime:  now,
                RunTime:       now,
                UpdateTime:    now,
        }
        if cp.DestinationDataset != nil {
                run.DestinationDataset = cp.DestinationDataset
        }
        if err := h.Runner.RunScheduledQueryTransfer(project, location, sql, defDS); err != nil {
                run.State = transferStateFailed
                run.Errors = []any{transferRunErrorPayload(err.Error())}
        } else {
                run.State = transferStateSucceeded
        }
        h.mu.Lock()
        h.runs[runFull] = run
        h.mu.Unlock()
        out := startManualTransferRunsResponse{Runs: []transferRunResource{*run}}
        writeJSON(h.logger(), w, http.StatusOK, out)
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datatransfer

import (
        "encoding/json"
        "io"
        "net/http"
        "sort"
        "strconv"
        "strings"
)

// emulatorDefaultTransferLocation matches the location BigQuery gapic
// REST clients assume when they POST to project-scoped
// `.../projects/{p}/transferConfigs` (no `/locations/` segment in the
// URL). Live BigQuery routes those to the multi-region `us`; the
// emulator stores them at the same key so the per-location LIST below
// can still surface them.
const emulatorDefaultTransferLocation = "us"

func (h *Handler) handleListConfigsProjectScoped(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        locPrefix := "projects/" + project + "/locations/"
        dsFilter := parseDataSourceIDsFilter(r.URL.Query())

        h.mu.Lock()
        var keys []string
        for k := range h.configs {
                if strings.HasPrefix(k, locPrefix) && strings.Contains(k, "/transferConfigs/") {
                        keys = append(keys, k)
                }
        }
        h.mu.Unlock()
        sort.Strings(keys)

        h.mu.Lock()
        filtered := make([]string, 0, len(keys))
        for _, k := range keys {
                if c, ok := h.configs[k]; ok && configMatchesDataSourceFilter(c, dsFilter) {
                        filtered = append(filtered, k)
                }
        }
        h.mu.Unlock()

        start, end := pageWindow(len(filtered), r.URL.Query().Get("pageSize"), r.URL.Query().Get("pageToken"))
        pageKeys := filtered[start:end]

        h.mu.Lock()
        defer h.mu.Unlock()
        out := make([]transferConfigResource, 0, len(pageKeys))
        for _, k := range pageKeys {
                if c, ok := h.configs[k]; ok {
                        out = append(out, *c)
                }
        }
        resp := listConfigsResponse{TransferConfigs: out}
        if end < len(filtered) {
                resp.NextPageToken = strconv.Itoa(end)
        }
        writeJSON(h.logger(), w, http.StatusOK, resp)
}

// normalizeTransferConfigInput normalizes the destination oneof gapic
// REST clients send (`destinationDatasetId` and/or nested
// `destinationDataset.datasetReference`) so the in-memory record
// carries both wire forms.
func normalizeTransferConfigInput(projectID string, in *transferConfigResource) {
        if in == nil {
                return
        }
        if strings.TrimSpace(in.DestinationDatasetID) == "" &&
                in.DestinationDataset != nil &&
                in.DestinationDataset.DatasetReference != nil {
                in.DestinationDatasetID = strings.TrimSpace(
                        in.DestinationDataset.DatasetReference.DatasetID)
        }
        did := strings.TrimSpace(in.DestinationDatasetID)
        if did == "" {
                return
        }
        if in.DestinationDataset == nil {
                in.DestinationDataset = &struct {
                        DatasetReference *struct {
                                ProjectID string `json:"projectId,omitempty"`
                                DatasetID string `json:"datasetId,omitempty"`
                        } `json:"datasetReference,omitempty"`
                }{
                        DatasetReference: &struct {
                                ProjectID string `json:"projectId,omitempty"`
                                DatasetID string `json:"datasetId,omitempty"`
                        }{ProjectID: projectID, DatasetID: did},
                }
                return
        }
        if in.DestinationDataset.DatasetReference == nil {
                in.DestinationDataset.DatasetReference = &struct {
                        ProjectID string `json:"projectId,omitempty"`
                        DatasetID string `json:"datasetId,omitempty"`
                }{ProjectID: projectID, DatasetID: did}
                return
        }
        ref := in.DestinationDataset.DatasetReference
        if strings.TrimSpace(ref.DatasetID) == "" {
                ref.DatasetID = did
        }
        if strings.TrimSpace(ref.ProjectID) == "" {
                ref.ProjectID = projectID
        }
}

func (h *Handler) handleCreateConfigProjectScoped(w http.ResponseWriter, r *http.Request) {
        project := r.PathValue("projectId")
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid body")
                return
        }
        _ = r.Body.Close()
        var in transferConfigResource
        if len(strings.TrimSpace(string(body))) > 0 {
                if err := json.Unmarshal(body, &in); err != nil {
                        writeAPIError(h.logger(), w, http.StatusBadRequest, "invalid json: "+err.Error())
                        return
                }
        }
        normalizeTransferConfigInput(project, &in)
        h.finishCreateTransferConfig(w, project, emulatorDefaultTransferLocation, in)
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package datatransfer

import "strconv"

// pageWindow returns the [start, end) slice into a sorted resource
// list for the given pageSize / pageToken query knobs. pageToken is
// an integer-as-string offset (the same scheme this handler emits and
// re-reads). Defaults: pageSize 100,
// max 1000, missing/invalid token resets to 0.
func pageWindow(lenNames int, pageSizeStr, pageToken string) (start, end int) {
        pageSize := 0
        if pageSizeStr != "" {
                if n, err := strconv.Atoi(pageSizeStr); err == nil && n > 0 {
                        pageSize = n
                }
        }
        if pageSize <= 0 || pageSize > 1000 {
                pageSize = 100
        }
        if pageToken != "" {
                if off, err := strconv.Atoi(pageToken); err == nil && off >= 0 && off < lenNames {
                        start = off
                }
        }
        end = min(start+pageSize, lenNames)
        return start, end
}

package handlers

import (
        "encoding/json"
        "io"
        "net/http"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

func decodeDatasetBody(w http.ResponseWriter, r *http.Request) (bqtypes.Dataset, bool) {
        var ds bqtypes.Dataset
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, "invalid",
                        "Could not read dataset request body: "+err.Error())
                return ds, false
        }
        if len(body) == 0 {
                return ds, true
        }
        if err := json.Unmarshal(body, &ds); err != nil {
                writeError(w, http.StatusBadRequest, "invalid",
                        "Could not parse dataset request body as JSON: "+err.Error())
                return ds, false
        }
        return ds, true
}

func decodeTableBody(w http.ResponseWriter, r *http.Request) (bqtypes.Table, bool) {
        var t bqtypes.Table
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, "invalid",
                        "Could not read table request body: "+err.Error())
                return t, false
        }
        if len(body) == 0 {
                return t, true
        }
        if err := json.Unmarshal(body, &t); err != nil {
                writeError(w, http.StatusBadRequest, "invalid",
                        "Could not parse table request body as JSON: "+err.Error())
                return t, false
        }
        return t, true
}

package handlers

import (
        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/external"
        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

// DepsOptions carries gateway-level settings threaded into handler deps.
type DepsOptions struct {
        DataDir string

        // DefaultDatasetID is the server-level fallback dataset for
        // unqualified table names (see Dependencies.DefaultDatasetID).
        DefaultDatasetID string
}

// BuildDependencies constructs the shared handler dependency bundle used by
// both the REST gateway and the public gRPC surface. When eng is nil
// (gateway-only mode / unit tests) Catalog and Query stay nil and
// handlers fall back to their NotImplemented stubs.
func BuildDependencies(eng *engine.Client) Dependencies {
        return BuildDependenciesWith(eng, DepsOptions{})
}

// BuildDependenciesWith constructs deps with optional data-dir / external config.
func BuildDependenciesWith(eng *engine.Client, opts DepsOptions) Dependencies {
        var extCfg *sourceconfig.Config
        if opts.DataDir != "" {
                if c, err := sourceconfig.Load(opts.DataDir); err == nil {
                        extCfg = c
                }
        }
        deps := Dependencies{
                Jobs:             jobs.NewRegistry(),
                Metadata:         NewMetadataStore(),
                Snapshots:        NewSnapshotStore(),
                Routines:         NewRoutineStore(),
                Models:           NewModelStore(),
                Sessions:         NewSessionStore(),
                DataDir:          opts.DataDir,
                DefaultDatasetID: opts.DefaultDatasetID,
                ExternalSources:  extCfg,
        }
        if eng != nil {
                deps.Catalog = eng.Catalog
                deps.Query = eng.Query
        }
        return deps
}

// externalResolver returns the materialization resolver for deps.
func externalResolver(deps Dependencies) *external.Resolver {
        return external.NewResolver(deps.ExternalSources)
}

package handlers

import (
        "net/http"
        "sync"
)

// Discovery implements the BigQuery v2 discovery endpoint:
//
//        GET /discovery/v1/apis/bigquery/v2/rest
//
// Google API client libraries fetch a discovery document at startup to
// learn the method surface of the service they are talking to. The
// emulator serves a hand-written, minimal subset of the upstream
// discovery JSON that lists exactly the methods routed in
// [gateway.NewServer] (see docs/REST_API.md). It is deliberately small:
// just enough that a client library can find a `kind`, enumerate the
// method ids, and locate their paths/HTTP verbs.
//
// The shape follows Google's documented `discovery#restDescription`
// format. We do not claim parity with the upstream document for fields
// like `schemas`, `auth`, or `revision`; clients that depend on those
// should hit the real BigQuery discovery URL.
func Discovery(_ Dependencies) http.HandlerFunc {
        doc := buildDiscoveryDocument()
        return func(w http.ResponseWriter, _ *http.Request) {
                writeJSON(w, http.StatusOK, doc)
        }
}

// buildDiscoveryDocument constructs the minimal restDescription served
// by the emulator. It is built once and reused for every request.
//
// The catalog of methods here is the authoritative list mirrored in
// docs/REST_API.md and gateway/server.go. Keep all three in sync when
// adding a new endpoint: add the mux entry, add the table row, and add
// a method entry here.
var buildDiscoveryDocument = sync.OnceValue(func() discoveryDocument {
        return discoveryDocument{
                Kind:             discoveryKind,
                Etag:             "",
                DiscoveryVersion: "v1",
                ID:               "bigquery:v2",
                Name:             "bigquery",
                Version:          "v2",
                Title:            "BigQuery API (emulator)",
                Description: "Local BigQuery emulator REST surface. " +
                        "This discovery document lists only the methods the emulator " +
                        "actually routes; see docs/REST_API.md for the canonical mapping.",
                Protocol:    "rest",
                RootURL:     "",
                ServicePath: "bigquery/v2/",
                BasePath:    "/bigquery/v2/",
                BaseURL:     "/bigquery/v2/",
                BatchPath:   "batch/bigquery/v2",
                Parameters:  commonParameters(),
                Resources: map[string]discoveryResource{
                        discoveryResourceProjects: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList: {
                                                ID:         "bigquery.projects.list",
                                                Path:       "projects",
                                                HTTPMethod: http.MethodGet,
                                        },
                                        "getServiceAccount": {
                                                ID:             "bigquery.projects.getServiceAccount",
                                                Path:           "projects/{projectId}/serviceAccount",
                                                HTTPMethod:     http.MethodGet,
                                                ParameterOrder: []string{paramProjectID},
                                                Parameters: map[string]discoveryParameter{
                                                        paramProjectID: pathString(paramProjectID),
                                                },
                                        },
                                },
                        },
                        discoveryResourceDatasets: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList:   datasetsListMethod(),
                                        discoveryMethodInsert: datasetsInsertMethod(),
                                        discoveryMethodGet:    datasetsGetMethod(),
                                        discoveryMethodUpdate: datasetsUpdateMethod(),
                                        discoveryMethodPatch:  datasetsPatchMethod(),
                                        discoveryMethodDelete: datasetsDeleteMethod(),
                                        "undelete":            datasetsUndeleteMethod(),
                                },
                        },
                        discoveryResourceTables: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList:   tablesListMethod(),
                                        discoveryMethodInsert: tablesInsertMethod(),
                                        discoveryMethodGet:    tablesGetMethod(),
                                        discoveryMethodUpdate: tablesUpdateMethod(),
                                        discoveryMethodPatch:  tablesPatchMethod(),
                                        discoveryMethodDelete: tablesDeleteMethod(),
                                        "getIamPolicy":        tablesIamMethod("getIamPolicy"),
                                        "setIamPolicy":        tablesIamMethod("setIamPolicy"),
                                        "testIamPermissions":  tablesIamMethod("testIamPermissions"),
                                },
                        },
                        discoveryResourceTabledata: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList: tabledataListMethod(),
                                        "insertAll":         tabledataInsertAllMethod(),
                                },
                        },
                        discoveryResourceJobs: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList:   jobsListMethod(),
                                        discoveryMethodInsert: jobsInsertMethod(),
                                        discoveryMethodGet:    jobsGetMethod(),
                                        "cancel":              jobsCancelMethod(),
                                        discoveryMethodDelete: jobsDeleteMethod(),
                                        discoveryMethodQuery:  jobsQueryMethod(),
                                        "getQueryResults":     jobsGetQueryResultsMethod(),
                                },
                        },
                        discoveryResourceModels: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList:   modelsListMethod(),
                                        discoveryMethodGet:    modelsGetMethod(),
                                        discoveryMethodPatch:  modelsPatchMethod(),
                                        discoveryMethodDelete: modelsDeleteMethod(),
                                },
                        },
                        discoveryResourceRoutines: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList:   routinesListMethod(),
                                        discoveryMethodInsert: routinesInsertMethod(),
                                        discoveryMethodGet:    routinesGetMethod(),
                                        discoveryMethodUpdate: routinesUpdateMethod(),
                                        discoveryMethodDelete: routinesDeleteMethod(),
                                },
                        },
                        discoveryResourceRowPolicy: {
                                Methods: map[string]discoveryMethod{
                                        discoveryMethodList: rowAccessPoliciesListMethod(),
                                },
                        },
                },
        }
})

// discoveryKind is the kind value Google's discovery service stamps on
// every restDescription. The verification command (`jq .kind`) asserts
// this exact string, so it must not drift.
const discoveryKind = "discovery#restDescription"

// Discovery-document path-parameter names. The upstream BigQuery REST
// API exposes resources keyed off these {…} segments and the
// discovery JSON has to spell them out verbatim, which is why the same
// string repeats dozens of times across the method tables. Hoisted to
// consts so the JSON wire shape stays a single source of truth.
const (
        paramProjectID = "projectId"
        paramDatasetID = "datasetId"
        paramTableID   = "tableId"
        paramJobID     = "jobId"
        paramModelID   = "modelId"
        paramRoutineID = "routineId"
)

// Discovery-document resource keys. These are the JSON-object keys
// inside the document's top-level `resources` map; client libraries
// dispatch on them to find the method tables for each REST resource.
const (
        discoveryResourceProjects   = "projects"
        discoveryResourceDatasets   = "datasets"
        discoveryResourceTables     = "tables"
        discoveryResourceTabledata  = "tabledata"
        discoveryResourceJobs       = "jobs"
        discoveryResourceModels     = "models"
        discoveryResourceRoutines   = "routines"
        discoveryResourceRowPolicy  = "rowAccessPolicies"
        discoveryMethodList         = "list"
        discoveryMethodGet          = "get"
        discoveryMethodInsert       = "insert"
        discoveryMethodUpdate       = "update"
        discoveryMethodPatch        = "patch"
        discoveryMethodDelete       = "delete"
        discoveryMethodQuery        = "query"
        discoveryParamTypeString    = "string"
        discoveryParamLocationPath  = "path"
        discoveryParamLocationQuery = "query"
)

// discoveryDocument is the trimmed-down restDescription served by the
// emulator. It models only the fields client libraries actually consult
// for routing; the upstream document also contains schemas, scopes,
// auth, and feature flags which the emulator does not need.
type discoveryDocument struct {
        Kind             string                        `json:"kind"`
        Etag             string                        `json:"etag,omitempty"`
        DiscoveryVersion string                        `json:"discoveryVersion"`
        ID               string                        `json:"id"`
        Name             string                        `json:"name"`
        Version          string                        `json:"version"`
        Title            string                        `json:"title"`
        Description      string                        `json:"description,omitempty"`
        Protocol         string                        `json:"protocol"`
        RootURL          string                        `json:"rootUrl"`
        ServicePath      string                        `json:"servicePath"`
        BasePath         string                        `json:"basePath"`
        BaseURL          string                        `json:"baseUrl"`
        BatchPath        string                        `json:"batchPath,omitempty"`
        Parameters       map[string]discoveryParameter `json:"parameters,omitempty"`
        Resources        map[string]discoveryResource  `json:"resources"`
}

type discoveryResource struct {
        Methods map[string]discoveryMethod `json:"methods"`
}

type discoveryMethod struct {
        ID             string                        `json:"id"`
        Path           string                        `json:"path"`
        HTTPMethod     string                        `json:"httpMethod"`
        Description    string                        `json:"description,omitempty"`
        ParameterOrder []string                      `json:"parameterOrder,omitempty"`
        Parameters     map[string]discoveryParameter `json:"parameters,omitempty"`
}

type discoveryParameter struct {
        Type        string `json:"type"`
        Location    string `json:"location"`
        Required    bool   `json:"required,omitempty"`
        Description string `json:"description,omitempty"`
}

// pathString returns a required string path parameter with the given
// name. It is a small ergonomic helper so the method tables below stay
// readable.
func pathString(name string) discoveryParameter {
        return discoveryParameter{
                Type:        discoveryParamTypeString,
                Location:    discoveryParamLocationPath,
                Required:    true,
                Description: name,
        }
}

// commonParameters are the Google-standard query parameters every
// method accepts. We only declare the handful BigQuery clients actually
// pass; the full upstream list is much longer.
func commonParameters() map[string]discoveryParameter {
        return map[string]discoveryParameter{
                "alt": {
                        Type:        discoveryParamTypeString,
                        Location:    discoveryParamLocationQuery,
                        Description: "Data format for the response.",
                },
                "prettyPrint": {
                        Type:        "boolean",
                        Location:    discoveryParamLocationQuery,
                        Description: "Returns response with indentations and line breaks.",
                },
                "key": {
                        Type:        discoveryParamTypeString,
                        Location:    discoveryParamLocationQuery,
                        Description: "API key. Ignored by the emulator.",
                },
                "access_token": {
                        Type:        discoveryParamTypeString,
                        Location:    discoveryParamLocationQuery,
                        Description: "OAuth access token. Ignored by the emulator.",
                },
        }
}

package handlers

import "net/http"

// The dataset / table / job method definitions are factored into
// individual helpers so each method's parameter set stays grouped with
// its path and id. They are not exported because they are only used to
// populate buildDiscoveryDocument.

func datasetsListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.datasets.list",
                Path:           "projects/{projectId}/datasets",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID},
                Parameters:     map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
        }
}

func datasetsInsertMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.datasets.insert",
                Path:           "projects/{projectId}/datasets",
                HTTPMethod:     http.MethodPost,
                ParameterOrder: []string{paramProjectID},
                Parameters:     map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
        }
}

func datasetsGetMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.datasets.get",
                Path:           "projects/{projectId}/datasets/{datasetId}",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramDatasetID: pathString(paramDatasetID),
                },
        }
}

func datasetsUpdateMethod() discoveryMethod {
        m := datasetsGetMethod()
        m.ID = "bigquery.datasets.update"
        m.HTTPMethod = http.MethodPut
        return m
}

func datasetsPatchMethod() discoveryMethod {
        m := datasetsGetMethod()
        m.ID = "bigquery.datasets.patch"
        m.HTTPMethod = http.MethodPatch
        return m
}

func datasetsDeleteMethod() discoveryMethod {
        m := datasetsGetMethod()
        m.ID = "bigquery.datasets.delete"
        m.HTTPMethod = http.MethodDelete
        return m
}

func datasetsUndeleteMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.datasets.undelete",
                Path:           "projects/{projectId}/datasets/{datasetId}:undelete",
                HTTPMethod:     http.MethodPost,
                ParameterOrder: []string{paramProjectID, paramDatasetID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramDatasetID: pathString(paramDatasetID),
                },
        }
}

func tableScopedParams() map[string]discoveryParameter {
        return map[string]discoveryParameter{
                paramProjectID: pathString(paramProjectID),
                paramDatasetID: pathString(paramDatasetID),
                paramTableID:   pathString(paramTableID),
        }
}

func tablesListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.tables.list",
                Path:           "projects/{projectId}/datasets/{datasetId}/tables",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramDatasetID: pathString(paramDatasetID),
                },
        }
}

func tablesInsertMethod() discoveryMethod {
        m := tablesListMethod()
        m.ID = "bigquery.tables.insert"
        m.HTTPMethod = http.MethodPost
        return m
}

func tablesGetMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.tables.get",
                Path:           "projects/{projectId}/datasets/{datasetId}/tables/{tableId}",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
                Parameters:     tableScopedParams(),
        }
}

func tablesUpdateMethod() discoveryMethod {
        m := tablesGetMethod()
        m.ID = "bigquery.tables.update"
        m.HTTPMethod = http.MethodPut
        return m
}

func tablesPatchMethod() discoveryMethod {
        m := tablesGetMethod()
        m.ID = "bigquery.tables.patch"
        m.HTTPMethod = http.MethodPatch
        return m
}

func tablesDeleteMethod() discoveryMethod {
        m := tablesGetMethod()
        m.ID = "bigquery.tables.delete"
        m.HTTPMethod = http.MethodDelete
        return m
}

func tablesIamMethod(op string) discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.tables." + op,
                Path:           "projects/{projectId}/datasets/{datasetId}/tables/{tableId}:" + op,
                HTTPMethod:     http.MethodPost,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
                Parameters:     tableScopedParams(),
        }
}

func tabledataListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.tabledata.list",
                Path:           "projects/{projectId}/datasets/{datasetId}/tables/{tableId}/data",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
                Parameters:     tableScopedParams(),
        }
}

func tabledataInsertAllMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.tabledata.insertAll",
                Path:           "projects/{projectId}/datasets/{datasetId}/tables/{tableId}/insertAll",
                HTTPMethod:     http.MethodPost,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
                Parameters:     tableScopedParams(),
        }
}

func jobsListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.jobs.list",
                Path:           "projects/{projectId}/jobs",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID},
                Parameters:     map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
        }
}

func jobsInsertMethod() discoveryMethod {
        m := jobsListMethod()
        m.ID = "bigquery.jobs.insert"
        m.HTTPMethod = http.MethodPost
        return m
}

func jobsGetMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.jobs.get",
                Path:           "projects/{projectId}/jobs/{jobId}",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramJobID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramJobID:     pathString(paramJobID),
                },
        }
}

func jobsCancelMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.jobs.cancel",
                Path:           "projects/{projectId}/jobs/{jobId}/cancel",
                HTTPMethod:     http.MethodPost,
                ParameterOrder: []string{paramProjectID, paramJobID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramJobID:     pathString(paramJobID),
                },
        }
}

func jobsDeleteMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.jobs.delete",
                Path:           "projects/{projectId}/jobs/{jobId}/delete",
                HTTPMethod:     http.MethodDelete,
                ParameterOrder: []string{paramProjectID, paramJobID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramJobID:     pathString(paramJobID),
                },
        }
}

func jobsQueryMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.jobs.query",
                Path:           "projects/{projectId}/queries",
                HTTPMethod:     http.MethodPost,
                ParameterOrder: []string{paramProjectID},
                Parameters:     map[string]discoveryParameter{paramProjectID: pathString(paramProjectID)},
        }
}

func jobsGetQueryResultsMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.jobs.getQueryResults",
                Path:           "projects/{projectId}/queries/{jobId}",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramJobID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramJobID:     pathString(paramJobID),
                },
        }
}

// modelScopedParams covers the path captures shared by every
// bigquery.models.* method that targets a specific model.
func modelScopedParams() map[string]discoveryParameter {
        return map[string]discoveryParameter{
                paramProjectID: pathString(paramProjectID),
                paramDatasetID: pathString(paramDatasetID),
                paramModelID:   pathString(paramModelID),
        }
}

func modelsListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.models.list",
                Path:           "projects/{projectId}/datasets/{datasetId}/models",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramDatasetID: pathString(paramDatasetID),
                },
        }
}

func modelsGetMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.models.get",
                Path:           "projects/{projectId}/datasets/{datasetId}/models/{modelId}",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramModelID},
                Parameters:     modelScopedParams(),
        }
}

func modelsPatchMethod() discoveryMethod {
        m := modelsGetMethod()
        m.ID = "bigquery.models.patch"
        m.HTTPMethod = http.MethodPatch
        return m
}

func modelsDeleteMethod() discoveryMethod {
        m := modelsGetMethod()
        m.ID = "bigquery.models.delete"
        m.HTTPMethod = http.MethodDelete
        return m
}

// routineScopedParams covers the path captures shared by every
// bigquery.routines.* method that targets a specific routine.
func routineScopedParams() map[string]discoveryParameter {
        return map[string]discoveryParameter{
                paramProjectID: pathString(paramProjectID),
                paramDatasetID: pathString(paramDatasetID),
                paramRoutineID: pathString(paramRoutineID),
        }
}

func routinesListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.routines.list",
                Path:           "projects/{projectId}/datasets/{datasetId}/routines",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID},
                Parameters: map[string]discoveryParameter{
                        paramProjectID: pathString(paramProjectID),
                        paramDatasetID: pathString(paramDatasetID),
                },
        }
}

func routinesInsertMethod() discoveryMethod {
        m := routinesListMethod()
        m.ID = "bigquery.routines.insert"
        m.HTTPMethod = http.MethodPost
        return m
}

func routinesGetMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.routines.get",
                Path:           "projects/{projectId}/datasets/{datasetId}/routines/{routineId}",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramRoutineID},
                Parameters:     routineScopedParams(),
        }
}

func routinesUpdateMethod() discoveryMethod {
        m := routinesGetMethod()
        m.ID = "bigquery.routines.update"
        m.HTTPMethod = http.MethodPut
        return m
}

func routinesDeleteMethod() discoveryMethod {
        m := routinesGetMethod()
        m.ID = "bigquery.routines.delete"
        m.HTTPMethod = http.MethodDelete
        return m
}

func rowAccessPoliciesListMethod() discoveryMethod {
        return discoveryMethod{
                ID:             "bigquery.rowAccessPolicies.list",
                Path:           "projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies",
                HTTPMethod:     http.MethodGet,
                ParameterOrder: []string{paramProjectID, paramDatasetID, paramTableID},
                Parameters:     tableScopedParams(),
        }
}

package handlers

import (
        "net/http"

        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

// queryGRPCToHTTPError translates a gRPC error returned by the engine's
// Query service into a BigQuery-shaped JSON error envelope and writes
// it to w. Returns true when err was non-nil (and therefore an error
// was written), so callers can use it as `if queryGRPCToHTTPError(...)
// { return }`.
//
// The mapping mirrors grpcToHTTPError but uses query-specific REST
// reason codes the BigQuery client libraries recognize:
//
//   - INVALID_ARGUMENT → 400 invalidQuery (parse / analysis errors,
//     unknown table or column references, type mismatches; see
//     `frontend/handlers/query.cc::AnalyzeStatusToGrpc` and
//     docs/REST_API.md "SQL dialect" for why analysis errors must
//     carry `reason: invalidQuery` rather than the generic `invalid`).
//   - NOT_FOUND → 404 notFound (a referenced table or dataset is
//     missing; the engine usually wraps these as INVALID_ARGUMENT
//     because GoogleSQL surfaces them through the analyzer, but
//     storage-side NOT_FOUNDs from `DescribeTable` need their own
//     mapping to keep parity with `tables.get`).
//   - ALREADY_EXISTS → 409 duplicate (DDL/control-op conflicts such as
//     UNDROP SCHEMA after recreating the same dataset id).
//   - FAILED_PRECONDITION → 400 invalidQuery (the engine raises this
//     when the catalog has not been initialized; the gateway folds it
//     into the same 400 reason a client sees when the SQL itself is
//     invalid because there is nothing actionable beyond "the
//     emulator is not ready" and the BigQuery REST envelope has no
//     dedicated code for that).
//   - UNIMPLEMENTED → 501 notImplemented (the gateway is talking to a
//     legacy engine build with `--googlesql=off`).
//   - UNAVAILABLE / DEADLINE_EXCEEDED → 503 backendError /
//     504 backendError; same as `grpcToHTTPError`.
//
// Anything else (INTERNAL, plain Go errors) is reported as 500
// internalError so a misbehaving engine cannot be mistaken for a
// recoverable client-side issue.
func queryGRPCToHTTPError(w http.ResponseWriter, err error) bool {
        if err == nil {
                return false
        }
        st, ok := status.FromError(err)
        if !ok {
                writeError(w, http.StatusInternalServerError, reasonInternalError,
                        "Engine RPC failed: "+err.Error())
                return true
        }
        httpStatus, reason := http.StatusInternalServerError, reasonInternalError
        switch st.Code() {
        case codes.OK:
                return false
        case codes.InvalidArgument, codes.FailedPrecondition:
                httpStatus, reason = http.StatusBadRequest, reasonInvalidQuery
        case codes.NotFound:
                httpStatus, reason = http.StatusNotFound, reasonNotFound
        case codes.AlreadyExists:
                httpStatus, reason = http.StatusConflict, reasonDuplicate
        case codes.PermissionDenied:
                httpStatus, reason = http.StatusForbidden, reasonAccessDenied
        case codes.Unauthenticated:
                httpStatus, reason = http.StatusUnauthorized, reasonAuthError
        case codes.Unimplemented:
                httpStatus, reason = http.StatusNotImplemented, reasonNotImplemented
        case codes.Unavailable:
                httpStatus, reason = http.StatusServiceUnavailable, reasonBackendError
        case codes.DeadlineExceeded:
                httpStatus, reason = http.StatusGatewayTimeout, reasonBackendError
        case codes.ResourceExhausted:
                httpStatus, reason = http.StatusTooManyRequests, reasonQuotaExceeded
        }
        writeError(w, httpStatus, reason, bqStyleMessage(st.Message()))
        return true
}

package handlers

import (
        "context"
        "net/http"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/external"
)

// queryDefaultDatasetForExecute resolves defaultDataset and materializes
// tableDefinitions before ExecuteQuery. Returns ok=false when an HTTP error
// was written.
func queryDefaultDatasetForExecute(
        deps Dependencies,
        w http.ResponseWriter,
        r *http.Request,
        projectID string,
        req *bqtypes.QueryRequest,
) (string, bool) {
        defaultDataset := resolveDefaultDataset(deps, req.DefaultDataset)
        defaultDataset, extErr := prepareQueryExternalTables(
                r.Context(), deps, projectID, req.TableDefinitions, defaultDataset)
        if writeExternalTableError(w, extErr) {
                return "", false
        }
        return defaultDataset, true
}

// prepareQueryExternalTables materializes ephemeral tableDefinitions and
// returns the default dataset id to forward to the engine. When err is
// non-nil the caller should emit an HTTP error (jobs.query) or record a
// failed job (jobs.insert).
func prepareQueryExternalTables(
        ctx context.Context,
        deps Dependencies,
        projectID string,
        tableDefs map[string]bqtypes.ExternalDataConfiguration,
        defaultDataset string,
) (string, error) {
        if len(tableDefs) == 0 || deps.Catalog == nil {
                return defaultDataset, nil
        }
        return external.PrepareTableDefinitionsWith(ctx, deps.Catalog, projectID, tableDefs, defaultDataset,
                externalResolver(deps))
}

// writeExternalTableError maps gateway-side external table failures to
// BigQuery-shaped HTTP responses for the synchronous query API.
func writeExternalTableError(w http.ResponseWriter, err error) bool {
        if err == nil {
                return false
        }
        writeError(w, http.StatusBadRequest, reasonInvalidQuery,
                "Could not prepare external table: "+err.Error())
        return true
}

// insertExternalTable materializes a GCS-backed external table on insert.
// Returns false when an error response was written.
func insertExternalTable(
        w http.ResponseWriter,
        r *http.Request,
        deps Dependencies,
        projectID, datasetID, tableID string,
        t *bqtypes.Table,
) bool {
        if t.Type == "" {
                t.Type = externalTableType
        }
        err := external.MaterializeWith(r.Context(), deps.Catalog, external.Target{
                ProjectID: projectID,
                DatasetID: datasetID,
                TableID:   tableID,
                Schema:    t.Schema,
        }, t.ExternalDataConfiguration, externalResolver(deps))
        return !writeExternalTableInsertError(w, err)
}

// writeExternalTableInsertError maps external table failures on tables.insert.
func writeExternalTableInsertError(w http.ResponseWriter, err error) bool {
        if err == nil {
                return false
        }
        writeError(w, http.StatusBadRequest, reasonInvalid,
                "Could not create external table: "+err.Error())
        return true
}

package handlers

import (
        "net/http"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

const enginePolicyFederatedURL = "docs/ENGINE_POLICY.md#external-query-and-federated-sources"

func writeUnsupportedFederatedEnvelope(w http.ResponseWriter, feature string) {
        writeError(w, http.StatusNotImplemented, "notImplemented",
                feature+" is not supported by the BigQuery emulator (fixture-backed EXTERNAL_QUERY only; see "+
                        enginePolicyFederatedURL+").")
}

// rejectUnsupportedTablePosture returns true when the handler wrote a 501.
func rejectUnsupportedTablePosture(w http.ResponseWriter, t *bqtypes.Table) bool {
        if t == nil {
                return false
        }
        if t.BiglakeConfiguration != nil {
                writeUnsupportedFederatedEnvelope(w,
                        "BigLake tables (biglakeConfiguration)")
                return true
        }
        if t.ObjectTableOptions != nil {
                writeUnsupportedFederatedEnvelope(w, "Object tables (objectTableOptions)")
                return true
        }
        if t.ExternalDataConfiguration != nil {
                src := strings.ToUpper(strings.TrimSpace(t.ExternalDataConfiguration.SourceFormat))
                if src == "OBJECT_TABLE" {
                        writeUnsupportedFederatedEnvelope(w, "Object tables (OBJECT_TABLE sourceFormat)")
                        return true
                }
        }
        return false
}

// rejectUnsupportedDatasetPosture returns true when the handler wrote a 501.
func rejectUnsupportedDatasetPosture(w http.ResponseWriter, ds *bqtypes.Dataset) bool {
        if ds == nil || ds.ExternalDatasetReference == nil {
                return false
        }
        writeUnsupportedFederatedEnvelope(w,
                "External datasets (Spanner / Cloud SQL externalDatasetReference)")
        return true
}

// Package handlers contains HTTP handlers for the BigQuery REST surface.
//
// At this stage of the project most handlers are intentional stubs that
// return http.StatusNotImplemented. They exist so that:
//
//   - The route table in gateway/server.go is exhaustive and easy to scan,
//     which doubles as a checklist for the gateway-HTTP-surface section of
//     ROADMAP.md.
//   - Client libraries get a structurally-valid BigQuery error envelope
//     instead of a 404 when they hit something we have not implemented yet.
//   - Each handler can be flipped to a real implementation in isolation.
package handlers

import (
        "encoding/json"
        "net/http"
        "os"
        "regexp"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/external/sourceconfig"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/models"
        "github.com/vantaboard/bigquery-emulator/gateway/routines"
        "github.com/vantaboard/bigquery-emulator/gateway/session"
        "github.com/vantaboard/bigquery-emulator/gateway/snapshots"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

// Dependencies bundles everything a handler might need to reach (engine
// gRPC client, in-memory catalog, logger, etc.). It grows as the gateway
// wires in real backends.
//
// Catalog and Query are the engine-side gRPC clients defined in
// proto/emulator.proto; both are nil when the gateway is started with
// --engine_binary="" (gateway-only / unit-test mode) and handlers
// must nil-check before dispatching to them.
type Dependencies struct {
        // Catalog is the gRPC client used by datasets/tables/tabledata
        // handlers to mirror catalog mutations into the engine.
        Catalog enginepb.CatalogClient

        // Query is the gRPC client used by jobs.query and the query branch
        // of jobs.insert to forward SQL execution to the engine.
        Query enginepb.QueryClient

        // Jobs is the in-memory job registry the synchronous jobs.query
        // handler records DONE jobs in, and that future jobs.get /
        // jobs.list handlers will read back from. When nil (legacy unit
        // tests that predate the registry), QueryRun lazily mints a
        // per-handler fallback so behavior stays compatible.
        Jobs *jobs.Registry

        // Metadata caches REST-only Dataset/Table fields the engine
        // does not yet persist (labels, defaultCollation, expirationTime,
        // rangePartitioning, clustering, ...). Insert/Patch/Update
        // populate it; Get reads it back and merges with the engine
        // response. Nil is treated as a no-op store so legacy unit
        // tests that do not opt in keep their echo posture.
        Metadata *MetadataStore

        // Snapshots retains deleted-table row captures for COPY jobs that
        // reference table@epoch decorators (undelete samples). Nil is a
        // no-op store.
        Snapshots *snapshots.Store

        // Routines is the in-memory UDF / TVF / procedure registry REST
        // handlers use for routines.* and DDL query jobs register into.
        // Nil is treated as a per-handler fallback store.
        Routines *routines.Store

        // Models is the in-memory BQML metadata registry REST handlers use
        // for models.* and CREATE MODEL DDL query jobs register into.
        // Nil is treated as a per-handler fallback store.
        Models *models.Store

        // Sessions is the in-memory BigQuery session registry used when
        // queries request createSession or pass connectionProperties
        // session_id. Nil is treated as a per-handler fallback store.
        Sessions *session.Store

        // DataDir is the engine persistent storage root (--data_dir). Used
        // to resolve external source fixture/local/live modes.
        DataDir string

        // DefaultDatasetID is the server-level fallback dataset used to
        // resolve unqualified (single-segment) table names when a query or
        // job does not carry its own `defaultDataset`. Mirrors setting
        // `default_dataset` on a production BigQuery client/job. Empty
        // means no fallback, so bare table names error exactly like
        // production BigQuery with no default dataset configured.
        DefaultDatasetID string

        // ExternalSources configures per-source fixture|local|live resolution.
        // Nil uses package defaults (GCS local, Sheets fixture).
        ExternalSources *sourceconfig.Config
}

// NewRoutineStore returns an empty routine registry for gateway deps.
func NewRoutineStore() *routines.Store {
        return routines.NewStore()
}

// NewModelStore returns an empty model metadata registry for gateway deps.
func NewModelStore() *models.Store {
        return models.NewStore()
}

// NewSessionStore returns an empty session registry for gateway deps.
func NewSessionStore() *session.Store {
        return session.NewStore()
}

// NewSnapshotStore returns an empty table snapshot store for gateway deps.
func NewSnapshotStore() *snapshots.Store {
        return snapshots.NewStore()
}

// Health is a trivial liveness endpoint useful for `docker-compose`
// health checks and CI smoke tests.
func Health(w http.ResponseWriter, r *http.Request) {
        writeJSON(w, http.StatusOK, map[string]string{
                "status":  "ok",
                "service": "bigquery-emulator",
        })
}

// NotImplemented returns a BigQuery-shaped 501 response. Used by routes
// that are registered but not yet implemented.
func NotImplemented(w http.ResponseWriter, r *http.Request) {
        writeError(w, http.StatusNotImplemented, reasonNotImplemented,
                "This BigQuery emulator route is registered but not yet implemented. "+
                        "See ROADMAP.md.")
}

// NotFound is the catch-all handler for paths not in the route table. It
// returns a BigQuery-shaped 404 so client libraries see a structured error.
func NotFound(w http.ResponseWriter, r *http.Request) {
        writeError(w, http.StatusNotFound, reasonNotFound,
                "No route matches "+r.Method+" "+r.URL.Path+".")
}

// splitColonOp splits an AIP-136 custom-method path segment of the form
// "{resource}:{op}" into its resource and op halves. If there is no colon
// the op is returned empty and the input is the resource. This is how the
// emulator dispatches BigQuery REST custom methods like
// `datasets/{datasetId}:undelete` and `tables/{tableId}:getIamPolicy`,
// because Go's net/http mux cannot match a literal segment after a
// wildcard.
func splitColonOp(segment string) (resource, op string) {
        for i := range len(segment) {
                if segment[i] == ':' {
                        return segment[:i], segment[i+1:]
                }
        }
        return segment, ""
}

// errorEnvelope matches the shape BigQuery returns for non-2xx responses.
// See https://cloud.google.com/bigquery/docs/reference/rest -> error format.
type errorEnvelope struct {
        Error errorBody `json:"error"`
}

type errorBody struct {
        Code    int           `json:"code"`
        Message string        `json:"message"`
        Errors  []errorDetail `json:"errors,omitempty"`
        Status  string        `json:"status,omitempty"`
}

type errorDetail struct {
        Reason  string `json:"reason"`
        Message string `json:"message"`
        Domain  string `json:"domain,omitempty"`
}

func writeJSON(w http.ResponseWriter, status int, body any) {
        w.Header().Set("Content-Type", "application/json; charset=utf-8")
        w.WriteHeader(status)
        _ = json.NewEncoder(w).Encode(body)
}

// writeLegacySQLError maps gateway/query legacy translation failures to
// BigQuery invalidQuery responses. Returns true when err was written.
func writeLegacySQLError(w http.ResponseWriter, err error) bool {
        if err == nil {
                return false
        }
        writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
        return true
}

func writeError(w http.ResponseWriter, status int, reason, msg string) {
        writeJSON(w, status, errorEnvelope{
                Error: errorBody{
                        Code:    status,
                        Message: msg,
                        Status:  reason,
                        Errors: []errorDetail{{
                                Reason:  reason,
                                Message: msg,
                                Domain:  "global",
                        }},
                },
        })
}

// grpcToHTTPError translates a gRPC error returned by the engine into
// the BigQuery-shaped JSON error envelope and writes it to w. Returns
// true when err was non-nil (and therefore an error was written), so
// callers can use it as `if grpcToHTTPError(...) { return }`.
//
// The mapping mirrors the Storage→gRPC mapping in
// frontend/handlers/catalog.cc: NOT_FOUND → 404 notFound,
// ALREADY_EXISTS → 409 duplicate, INVALID_ARGUMENT → 400 invalid,
// FAILED_PRECONDITION → 400 failedPrecondition, UNIMPLEMENTED → 501
// notImplemented, UNAVAILABLE → 503 backendError. Anything else
// (INTERNAL, plain Go errors) is reported as 500 internalError so a
// misbehaving engine cannot be mistaken for a 404 on the wire.
//
// The error message itself is rewritten into BigQuery's canonical
// shape via bqStyleMessage so client-side assertions like
// `expect(err.message).to.include('Not found')` and
// `expect(err.message).to.include('Already Exists')` match the live
// surface.
func grpcToHTTPError(w http.ResponseWriter, err error) bool {
        if err == nil {
                return false
        }
        st, ok := status.FromError(err)
        if !ok {
                writeError(w, http.StatusInternalServerError, reasonInternalError,
                        "Engine RPC failed: "+err.Error())
                return true
        }
        httpStatus, reason := http.StatusInternalServerError, reasonInternalError
        switch st.Code() {
        case codes.OK:
                return false
        case codes.NotFound:
                httpStatus, reason = http.StatusNotFound, reasonNotFound
        case codes.AlreadyExists:
                httpStatus, reason = http.StatusConflict, reasonDuplicate
        case codes.InvalidArgument:
                httpStatus, reason = http.StatusBadRequest, reasonInvalid
        case codes.FailedPrecondition:
                httpStatus, reason = http.StatusBadRequest, reasonFailedPrecondition
        case codes.PermissionDenied:
                httpStatus, reason = http.StatusForbidden, reasonAccessDenied
        case codes.Unauthenticated:
                // The emulator never authenticates so this is unlikely, but
                // map it so a buggy engine doesn't crash through to 500.
                httpStatus, reason = http.StatusUnauthorized, reasonAuthError
        case codes.Unimplemented:
                httpStatus, reason = http.StatusNotImplemented, reasonNotImplemented
        case codes.Unavailable:
                httpStatus, reason = http.StatusServiceUnavailable, reasonBackendError
        case codes.DeadlineExceeded:
                httpStatus, reason = http.StatusGatewayTimeout, reasonBackendError
        case codes.ResourceExhausted:
                httpStatus, reason = http.StatusTooManyRequests, reasonQuotaExceeded
        }
        writeError(w, httpStatus, reason, bqStyleMessage(st.Message()))
        return true
}

// bqInvalidTimestampStringMsg is BigQuery's wire message for rejected
// TIMESTAMP parameter / wire-string values (see params_timestamp_reject).
const bqInvalidTimestampStringMsg = "Invalid timestamp string"

// notFoundResourceRE / alreadyExistsResourceRE match the engine's
// canonical storage-layer error strings produced by DuckDBStorage
// (see backend/storage/duckdb/duckdb_storage.cc): "<noun> not found:
// <project>.<dataset>[.<table>]" and "<noun> already exists:
// <project>.<dataset>[.<table>]" where <noun> is "table" or
// "dataset". The resource path uses `.` between every segment on the
// engine side; BigQuery REST uses `:` between project and dataset and
// `.` between dataset and table. The captured suffix is rewritten to
// the REST shape and the noun is capitalised so client assertions for
// "Not found" / "Already Exists" prefixes (live BigQuery's canonical
// shape) match.
var (
        notFoundResourceRE = regexp.MustCompile(
                `^(table|dataset) not found: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
        alreadyExistsResourceRE = regexp.MustCompile(
                `^(table|dataset) already exists: ([^.]+)\.([^.]+)(?:\.([^.]+))?$`)
)

// bqStyleMessage rewrites the small set of engine-side storage errors
// the gateway forwards into BigQuery's canonical wire shape. Examples:
//
//        "table not found: dev.foo.bar"      -> "Not found: Table dev:foo.bar"
//        "dataset not found: dev.foo"        -> "Not found: Dataset dev:foo"
//        "table already exists: dev.foo.bar" -> "Already Exists: Table dev:foo.bar"
//        "dataset already exists: dev.foo"   -> "Already Exists: Dataset dev:foo"
//
// Any message that does not match a known pattern passes through
// verbatim so non-storage errors (analysis failures, etc.) keep their
// engine-side wording. The regexes anchor on `^...$` to avoid matching
// embedded substrings and accept only the two storage nouns the engine
// emits today; future additions go here as the catalog grows.
func bqStyleMessage(msg string) string {
        if m := notFoundResourceRE.FindStringSubmatch(msg); m != nil {
                return bqStyleResourceMessage("Not found", m[1], m[2], m[3], m[4])
        }
        if m := alreadyExistsResourceRE.FindStringSubmatch(msg); m != nil {
                return bqStyleResourceMessage("Already Exists", m[1], m[2], m[3], m[4])
        }
        if strings.HasPrefix(msg, "semantic: invalid TIMESTAMP parameter value ") ||
                strings.HasPrefix(msg, "semantic: invalid TIMESTAMP value ") {
                return bqInvalidTimestampStringMsg
        }
        return msg
}

// bqStyleResourceMessage assembles "<verb>: <Noun> <project>:<dataset>[.<table>]".
// `table` is empty when the engine matched the dataset variant.
func bqStyleResourceMessage(verb, noun, project, dataset, table string) string {
        resource := project + ":" + dataset
        if table != "" {
                resource += "." + table
        }
        switch noun {
        case "table":
                return verb + ": Table " + resource
        case "dataset":
                return verb + ": Dataset " + resource
        default:
                // Unreachable given the regex character class, but keep a
                // defensive fall-through so a future regex tweak that adds a
                // new noun without a switch arm cannot silently lose the
                // rewrite.
                return verb + ": " + noun + " " + resource
        }
}

// requestEmulatorBaseURL returns the absolute emulator REST origin for
// resumable upload Location headers (scheme + host, no trailing slash).
func requestEmulatorBaseURL(r *http.Request) string {
        if host := strings.TrimSpace(os.Getenv("BIGQUERY_EMULATOR_HOST")); host != "" {
                host = strings.TrimRight(host, "/")
                if !strings.Contains(host, "://") {
                        host = "http://" + strings.TrimPrefix(host, "//")
                }
                return host
        }
        if r == nil {
                return ""
        }
        scheme := "http"
        if r.TLS != nil {
                scheme = "https"
        }
        if fwd := strings.TrimSpace(r.Header.Get("X-Forwarded-Proto")); fwd != "" {
                scheme = strings.TrimSpace(strings.Split(fwd, ",")[0])
        }
        host := r.Host
        if fwdHost := strings.TrimSpace(r.Header.Get("X-Forwarded-Host")); fwdHost != "" {
                host = strings.TrimSpace(strings.Split(fwdHost, ",")[0])
        }
        if host == "" {
                return ""
        }
        return scheme + "://" + host
}

package handlers

import (
        "encoding/json"
        "io"
        "net/http"
        "strconv"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
        "github.com/vantaboard/bigquery-emulator/gateway/middleware"
        "github.com/vantaboard/bigquery-emulator/gateway/query"
)

// jobListKind is the value the BigQuery REST API returns for the
// `kind` field of a JobList response. See
// docs/bigquery/docs/reference/rest/v2/jobs/list.md.
const jobListKind = "bigquery#jobList"

// jobCancelKind is the value of `kind` on a JobCancelResponse, the
// envelope `jobs.cancel` returns. The body wraps the updated Job.
// See docs/bigquery/docs/reference/rest/v2/jobs/cancel.md.
const jobCancelKind = "bigquery#jobCancelResponse"

// jobConfigurationKindQuery is the value of `configuration.jobType`
// for a query job. The wire schema spells the type discriminator in
// upper-case (QUERY / LOAD / COPY / EXTRACT); we round-trip it as the
// caller posts it but stamp it explicitly when the caller leaves it
// empty so a subsequent `jobs.get` doesn't lose the discriminator.
const (
        jobConfigurationKindQuery   = "QUERY"
        jobConfigurationKindLoad    = "LOAD"
        jobConfigurationKindCopy    = "COPY"
        jobConfigurationKindExtract = "EXTRACT"
)

// queryParamTrue is the wire literal BigQuery's REST surface uses for
// boolean query parameters (e.g. `allUsers=true`, `deleteContents=true`).
// Promoted to a constant so the goconst lint counter does not flag
// the repeated literal across handlers.
const queryParamTrue = "true"

// JobList implements `bigquery.jobs.list`:
//
//        GET /bigquery/v2/projects/{projectId}/jobs
//
// Supports the documented query parameters `allUsers`, `maxResults`,
// `minCreationTime`, `maxCreationTime`, `pageToken`, `projection`,
// `stateFilter`, and `parentJobId`. `allUsers=true` is rejected with
// a documented 501 because the emulator does not have an auth
// context to scope cross-user listings to; every other documented
// parameter is honored by `Registry.ListByProject`.
//
// The per-entry shape mirrors upstream's "minimal" projection
// (`kind`, `id`, `jobReference`, `state`, `status`, `statistics`,
// `configuration`, `user_email`); we surface the full registry Job
// today because the emulator's per-job payload is already small and
// projection-trimming has no behavioral upside before plan tp08
// inflates the schema.
func JobList(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                q := r.URL.Query()
                if q.Get("allUsers") == queryParamTrue {
                        writeError(w, http.StatusNotImplemented, reasonNotImplemented,
                                "jobs.list with allUsers=true is not supported; "+
                                        "the emulator has no auth context to scope cross-user "+
                                        "listings.")
                        return
                }
                opts := jobs.ListOptions{
                        MaxResults:      clampToInt(parseUintQuery(q, "maxResults", 0)),
                        PageToken:       q.Get("pageToken"),
                        ParentJobID:     q.Get("parentJobId"),
                        MinCreationTime: clampToInt64(parseUintQuery(q, "minCreationTime", 0)),
                        MaxCreationTime: clampToInt64(parseUintQuery(q, "maxCreationTime", 0)),
                        StateFilter:     q["stateFilter"],
                }
                items, nextPageToken := deps.Jobs.ListByProject(projectID, opts)
                resp := map[string]any{
                        resourceKeyKind: jobListKind,
                        "jobs":          items,
                }
                if nextPageToken != "" {
                        resp["nextPageToken"] = nextPageToken
                }
                writeJSON(w, http.StatusOK, resp)
        }
}

// JobInsert implements `bigquery.jobs.insert` (metadata-only variant):
//
//        POST /bigquery/v2/projects/{projectId}/jobs
//
// The body is a Job resource with `configuration.{query|load|copy|
// extract}`. Query jobs execute synchronously through the engine;
// load / copy / extract dispatch and round-trip configuration with
// per-type statistics but defer byte-level work to plans tp08-04/05.
//
// For the query branch the handler:
//
//  1. Mints (or honors a caller-supplied) jobId on the inbound
//     JobReference.
//  2. Forwards the SQL to `enginepb.Query.ExecuteQuery` -- the same
//     RPC `QueryRun` (jobs.query) uses -- so the engine path is
//     shared. The streamed schema / rows / dml stats are captured on
//     the registry's `QueryResult` so a follow-up
//     `jobs.getQueryResults` replays them.
//  3. Records the resulting Job in `deps.Jobs` so a subsequent
//     `jobs.list` / `jobs.get` / `jobs.cancel` / `jobs.delete` can
//     find it by id, then returns the Job verbatim with HTTP 200.
//
// Engine-side analysis errors (table not found, syntax error, ...)
// are captured into `Status.ErrorResult` instead of being surfaced
// as a 4xx — that mirrors BigQuery's `jobs.insert` contract, which
// always succeeds at the API level and reflects per-query failures
// through the Job's status. Transport-level failures (the engine
// process unreachable, `deps.Query` nil) still return 501 so unit-
// mode runs (`task emulator:run --engine_binary=""`) keep producing
// a structured error envelope.
func JobInsert(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                body, err := io.ReadAll(r.Body)
                if err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid,
                                "Could not read job request body: "+err.Error())
                        return
                }
                var posted jobs.Job
                if len(body) > 0 {
                        if err := json.Unmarshal(body, &posted); err != nil {
                                writeError(w, http.StatusBadRequest, reasonInvalid,
                                        "Could not parse job request body as JSON: "+err.Error())
                                return
                        }
                }
                cfg := posted.Configuration
                if cfg == nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid,
                                "Job configuration is required.")
                        return
                }
                switch {
                case cfg.Query != nil:
                        if deps.Query == nil {
                                NotImplemented(w, r)
                                return
                        }
                        runSyncQueryInsert(deps, w, r, &posted, cfg)
                case cfg.Load != nil:
                        runSyncLoadInsert(deps, w, r, &posted, cfg)
                case cfg.Copy != nil:
                        runSyncCopyInsert(deps, w, r, &posted, cfg)
                case cfg.Extract != nil:
                        runSyncExtractInsert(deps, w, r, &posted, cfg)
                default:
                        writeError(w, http.StatusNotImplemented, reasonNotImplemented,
                                "jobs.insert: configuration must include query, load, copy, or extract.")
                }
        }
}

// JobInsertUpload implements `bigquery.jobs.insert` (media-upload variant):
//
//        POST /upload/bigquery/v2/projects/{projectId}/jobs
//        PUT  /upload/bigquery/v2/projects/{projectId}/jobs
//
// Selected via `?uploadType=multipart` or `?uploadType=resumable`. The
// emulator accepts both because the official client libraries pick one
// based on payload size.
func JobInsertUpload(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        store := load.DefaultUploadStore()
        return func(w http.ResponseWriter, r *http.Request) {
                switch r.Method {
                case http.MethodPost:
                        handleJobInsertUploadPost(deps, store, w, r)
                case http.MethodPut:
                        handleJobInsertUploadPut(deps, store, w, r)
                default:
                        writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
                                "jobs.insert upload supports POST and PUT only")
                }
        }
}

// JobGet implements `bigquery.jobs.get`:
//
//        GET /bigquery/v2/projects/{projectId}/jobs/{jobId}
//
// Looks up the job in `deps.Jobs` by jobId, returning the stored Job
// verbatim. Mismatched projectIds (URL path vs. stored reference) and
// missing entries both map to a BigQuery-shaped 404 so the upstream
// `not found` contract holds; the `location` query parameter, when
// set, is matched against the stored jobReference and a wrong
// location also returns 404 (mirroring the upstream behavior of
// hiding cross-region jobs behind the same envelope).
func JobGet(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                jobID := r.PathValue("jobId")
                job, ok := deps.Jobs.Get(jobID)
                if !ok || job.JobReference.ProjectID != projectID {
                        writeJobNotFound(w, projectID, jobID, "")
                        return
                }
                if loc := r.URL.Query().Get("location"); loc != "" &&
                        job.JobReference.Location != "" &&
                        loc != job.JobReference.Location {
                        writeJobNotFound(w, projectID, jobID, loc)
                        return
                }
                writeJSON(w, http.StatusOK, job)
        }
}

// JobCancel implements `bigquery.jobs.cancel`:
//
//        POST /bigquery/v2/projects/{projectId}/jobs/{jobId}/cancel
//
// Returns a `JobCancelResponse` (kind + job) per the upstream wire
// shape. The registry flips the job to DONE with CancelRequested=true
// for non-terminal entries; terminal jobs (DONE / cancelled) get the
// cancel-requested flag stamped but their state stays put — the
// upstream API is documented as idempotent.
func JobCancel(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                jobID := r.PathValue("jobId")
                job, ok := deps.Jobs.Get(jobID)
                if !ok || job.JobReference.ProjectID != projectID {
                        writeJobNotFound(w, projectID, jobID, "")
                        return
                }
                updated, ok := deps.Jobs.Cancel(jobID)
                if !ok {
                        writeJobNotFound(w, projectID, jobID, "")
                        return
                }
                writeJSON(w, http.StatusOK, map[string]any{
                        resourceKeyKind: jobCancelKind,
                        "job":           updated,
                })
        }
}

// JobDelete implements `bigquery.jobs.delete`:
//
//        DELETE /bigquery/v2/projects/{projectId}/jobs/{jobId}/delete
//
// The literal `/delete` suffix is the upstream URL template, not a
// typo (see docs/bigquery/docs/reference/rest/v2/jobs/delete.md).
// Removes job metadata; if {jobId} is a script parent, child job
// metadata is also dropped in the same call. Returns HTTP 204 on
// success; 404 with the BigQuery error envelope when the jobId is
// unknown.
func JobDelete(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                jobID := r.PathValue("jobId")
                job, ok := deps.Jobs.Get(jobID)
                if !ok || job.JobReference.ProjectID != projectID {
                        writeJobNotFound(w, projectID, jobID, "")
                        return
                }
                if !deps.Jobs.Delete(jobID) {
                        writeJobNotFound(w, projectID, jobID, "")
                        return
                }
                w.WriteHeader(http.StatusNoContent)
        }
}

// clampToInt safely narrows a uint64 wire value (BigQuery REST
// transmits maxResults / page size as decimal strings parsed as
// uint64 here) into Go's platform-native int. Values above `math.
// MaxInt` saturate at the platform max so the gosec G115 narrowing
// guard does not need a per-call branch in every handler.
func clampToInt(v uint64) int {
        if v > uint64(maxInt) {
                return maxInt
        }
        return int(v)
}

// clampToInt64 saturates a uint64 at `math.MaxInt64` before narrowing
// to int64. The BigQuery REST surface documents creation timestamps
// as ms-since-epoch so the practical range stays well below 2^63, but
// the explicit guard keeps the gosec G115 lint clean.
func clampToInt64(v uint64) int64 {
        if v > uint64(int64Max) {
                return int64Max
        }
        return int64(v)
}

// maxInt and int64Max are platform constants used by the clamp
// helpers above. Spelled out here (instead of importing `math`) to
// keep the import surface minimal for the few callers that need
// them.
const (
        maxInt   = int(^uint(0) >> 1)
        int64Max = int64(^uint64(0) >> 1)
)

// millisString converts t to BigQuery's wire timestamp format:
// decimal milliseconds since the Unix epoch. The handlers reach for
// this on per-call timestamps (`finalizeDoneJob`, `finalizeFailedJob`)
// the way the jobs package's `Statistics` block already serializes
// `creationTime` / `startTime` / `endTime`.
func millisString(t time.Time) string {
        return strconv.FormatInt(t.UnixMilli(), 10)
}

// writeJobNotFound emits the BigQuery-shaped 404 envelope `jobs.get`,
// `jobs.cancel`, `jobs.delete`, and `jobs.getQueryResults` all return
// for an unknown job. When `location` is non-empty the message
// appends "in location <loc>" so the caller can tell a wrong-region
// lookup apart from a truly missing entry.
func writeJobNotFound(w http.ResponseWriter, projectID, jobID, location string) {
        msg := "Not found: Job " + projectID + ":" + jobID
        if location != "" {
                msg += " in location " + location
        }
        writeError(w, http.StatusNotFound, reasonNotFound, msg)
}

// runSyncQueryInsert is the sync slice of `JobInsert`'s implementation.
// Pulled out of the handler closure so the inbound-body validation +
// auth gating stays a thin top-level switch (cyclop / funlen caps).
//
// The flow mirrors `runQueryExecute` (the `jobs.query` handler's
// engine call) so analysis / streaming errors funnel through the
// same gRPC-to-HTTP mapping. The single difference is that
// `JobInsert` always returns a Job on success, never the bare
// `QueryResponse` payload `jobs.query` emits — the upstream API
// surfaces row data only on the sync `jobs.query` and follow-up
// `jobs.getQueryResults` calls.
//
//nolint:funlen // mirrors runQueryExecute; abort-session + external-table branches add statements
func runSyncQueryInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
        posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
        if cfg.DryRun {
                runSyncQueryDryRunInsert(deps, w, r, posted, cfg)
                return
        }
        if isMultiStatementScript(cfg.Query.Query) {
                runSyncScriptQueryInsert(deps, w, r, posted, cfg)
                return
        }
        projectID := r.PathValue("projectId")
        job := newPendingJob(deps, projectID, posted, cfg)
        job.UserEmail = principalEmailFromContext(r)

        useLegacy := false
        if cfg.Query.UseLegacySQL != nil {
                useLegacy = *cfg.Query.UseLegacySQL
        }
        defaultDataset := resolveDefaultDataset(deps, cfg.Query.DefaultDataset)
        defaultDataset, extErr := prepareQueryExternalTables(
                r.Context(), deps, projectID, cfg.Query.TableDefinitions, defaultDataset)
        if extErr != nil {
                start := time.Now().UTC()
                finalizeFailedJob(deps, job, start, extErr)
                writeJSON(w, http.StatusOK, job)
                return
        }
        if parseAbortSessionSQL(cfg.Query.Query) {
                start := time.Now().UTC()
                end := start
                sessionInfo := sessionStore(&deps).Resolve(
                        projectID, posted.JobReference.Location, false, cfg.Query.ConnectionProperties)
                finalizeDoneJob(deps, job, start, end, nil, nil, nil, "", "", nil, nil, sessionInfo, r)
                writeJSON(w, http.StatusOK, job)
                return
        }
        sql := expandQueryParamsInSQL(cfg.Query.Query, cfg.Query.QueryParameters)
        bindParams := stripExpandedArrayParams(cfg.Query.Query, sql, cfg.Query.QueryParameters)
        sql, sqlErr := query.PrepareEngineSQL(useLegacy, sql, projectID, defaultDataset)
        if sqlErr != nil {
                start := time.Now().UTC()
                finalizeFailedJob(deps, job, start, sqlErr)
                writeJSON(w, http.StatusOK, job)
                return
        }
        sql, sqlErr = query.PrepareEngineSQLForJobs(r.Context(), deps.Catalog, deps.Jobs, projectID, sql)
        if sqlErr != nil {
                start := time.Now().UTC()
                finalizeFailedJob(deps, job, start, sqlErr)
                writeJSON(w, http.StatusOK, job)
                return
        }
        engineReq := &enginepb.QueryRequest{
                ProjectId:        projectID,
                DefaultDatasetId: defaultDataset,
                Sql:              sql,
                UseLegacySql:     false,
                Parameters:       parametersToEngineMap(bindParams),
                PrincipalEmail:   principalEmailFromContext(r),
        }

        start := time.Now().UTC()
        stream, err := deps.Query.ExecuteQuery(r.Context(), engineReq)
        if err != nil {
                finalizeFailedJob(deps, job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, streamErr := drainSyncStream(stream)
        if streamErr != nil {
                finalizeFailedJob(deps, job, start, streamErr)
                writeJSON(w, http.StatusOK, job)
                return
        }
        restSchema := schemaFromProto(schema)
        if err := query.AppendResults(r.Context(), deps.Catalog, cfg.Query, projectID, restSchema, rows); err != nil {
                finalizeFailedJob(deps, job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        query.PersistDestinationMetadata(deps.Metadata, cfg.Query, projectID)
        var ddlTarget *bqtypes.RoutineReference
        if statementType == "CREATE_FUNCTION" || statementType == "CREATE_PROCEDURE" ||
                statementType == "CREATE_TABLE_FUNCTION" {
                ddlTarget = persistRoutineFromDDL(
                        r.Context(), &deps, projectID, defaultDataset, cfg.Query.Query)
        }
        if isCreateModelSQL(cfg.Query.Query) {
                persistModelFromDDL(r.Context(), &deps, projectID, defaultDataset, cfg.Query.Query)
        }
        handleViewDDLAfterQuery(&deps, projectID, defaultDataset, cfg.Query.Query, statementType)
        if cfg.Query.DestinationTable == nil && deps.Catalog != nil && len(rows) > 0 &&
                (statementType == "" || statementType == statementTypeSelect) {
                if dest, err := query.MaterializeImplicitDestination(
                        r.Context(), deps.Catalog, projectID, defaultDataset,
                        job.JobReference.JobID, restSchema, rows); err == nil {
                        cfg.Query.DestinationTable = dest
                        job.Configuration.Query.DestinationTable = dest
                }
        }
        end := time.Now().UTC()
        sessionInfo := sessionStore(&deps).Resolve(
                projectID, posted.JobReference.Location,
                queryJobCreateSession(cfg), queryJobConnectionProperties(cfg))
        finalizeDoneJob(
                deps,
                job,
                start,
                end,
                schema,
                dmlStats,
                rows,
                statementType,
                emulatorRoute,
                emulatorPhases,
                ddlTarget,
                sessionInfo,
                r,
        )
        writeJSON(w, http.StatusOK, job)
}

// runSyncQueryDryRunInsert handles jobs.insert with configuration.dryRun
// set. It forwards the SQL to enginepb.Query.DryRun and returns a DONE
// job whose statistics.totalBytesProcessed mirrors jobs.query dry-run.
func runSyncQueryDryRunInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
        posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
        projectID := r.PathValue("projectId")
        job := newPendingJob(deps, projectID, posted, cfg)

        useLegacy := false
        if cfg.Query.UseLegacySQL != nil {
                useLegacy = *cfg.Query.UseLegacySQL
        }
        defaultDataset := resolveDefaultDataset(deps, cfg.Query.DefaultDataset)
        defaultDataset, extErr := prepareQueryExternalTables(
                r.Context(), deps, projectID, cfg.Query.TableDefinitions, defaultDataset)
        if extErr != nil {
                start := time.Now().UTC()
                finalizeFailedJob(deps, job, start, extErr)
                writeJSON(w, http.StatusOK, job)
                return
        }
        sql, sqlErr := query.PrepareEngineSQL(useLegacy, cfg.Query.Query, projectID, defaultDataset)
        if sqlErr != nil {
                start := time.Now().UTC()
                finalizeFailedJob(deps, job, start, sqlErr)
                writeJSON(w, http.StatusOK, job)
                return
        }
        engineReq := &enginepb.QueryRequest{
                ProjectId:        projectID,
                DefaultDatasetId: defaultDataset,
                Sql:              sql,
                UseLegacySql:     false,
                Parameters:       parametersToEngineMap(cfg.Query.QueryParameters),
                PrincipalEmail:   principalEmailFromContext(r),
        }

        start := time.Now().UTC()
        resp, err := deps.Query.DryRun(r.Context(), engineReq)
        end := time.Now().UTC()
        if err != nil {
                finalizeFailedJob(deps, job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        job.Status.State = jobs.JobStateDone
        jobs.ApplyDryRunStatistics(job, resp.GetEstimatedBytesProcessed(), start, end)
        writeJSON(w, http.StatusOK, job)
}

// newPendingJob seeds the registry with a PENDING entry derived from
// the inbound `jobs.insert` body and returns the writable handle the
// rest of the flow stamps results onto. ProjectID always wins over
// the body's `jobReference.projectId` (URL path is authoritative);
// the caller-provided jobId, if any, is preserved verbatim.
func newPendingJob(deps Dependencies, projectID string, posted *jobs.Job, cfg *jobs.JobConfiguration) *jobs.Job {
        jobID := posted.JobReference.JobID
        if jobID == "" {
                jobID = deps.Jobs.NewJobID()
        }
        if cfg.JobType == "" {
                switch {
                case cfg.Load != nil:
                        cfg.JobType = jobConfigurationKindLoad
                case cfg.Copy != nil:
                        cfg.JobType = jobConfigurationKindCopy
                case cfg.Extract != nil:
                        cfg.JobType = jobConfigurationKindExtract
                default:
                        cfg.JobType = jobConfigurationKindQuery
                }
        }
        job := &jobs.Job{
                Kind: jobs.JobKind,
                ID:   projectID + ":" + jobID,
                JobReference: bqtypes.JobReference{
                        ProjectID: projectID,
                        JobID:     jobID,
                        Location:  posted.JobReference.Location,
                },
                Status:        jobs.Status{State: jobs.JobStatePending},
                Statistics:    jobs.Statistics{CreationTime: nowMillis()},
                Configuration: cfg,
        }
        deps.Jobs.Register(job)
        return job
}

// finalizeFailedJob flips a PENDING job to DONE + errorResult derived
// from the engine error and records the failure timestamps. The
// gateway leaves the message verbatim because BigQuery's REST surface
// surfaces analyzer errors with their raw position-tagged shape
// (e.g. "Unrecognized name: x [at 1:8]"); rewriting them would lose
// the column / row markers the upstream samples assert on.
//
// We deliberately leave `Status.Errors` nil: the upstream `jobs.insert`
// contract returns the job synchronously with a status envelope the
// caller polls later, and the official BigQuery Node client wraps any
// non-nil `status.errors` array into an `ApiError` immediately (see
// `@google-cloud/bigquery/src/bigquery.ts` -> createJob), which would
// turn an "engine reports analysis failure" into a thrown exception
// instead of a Job-with-error caller can inspect. `errorResult` is
// the right field for that single terminal error; clients that want
// the full list compose it from `errorResult` + any execution-time
// warnings (none today; the emulator runs jobs to completion).
func finalizeFailedJob(_ Dependencies, job *jobs.Job, start time.Time, err error) {
        finalizeFailedJobWithReason(job, start, err, reasonInvalidQuery)
}

// finalizeFailedDataPlaneJob records load/copy/extract failures on the
// Job status envelope using reason "invalid" so Node/Python clients
// surface the parser/fetch message instead of a generic transport error.
func finalizeFailedDataPlaneJob(job *jobs.Job, start time.Time, err error) {
        finalizeFailedJobWithReason(job, start, err, reasonInvalid)
}

func finalizeFailedJobWithReason(job *jobs.Job, start time.Time, err error, reason string) {
        end := time.Now().UTC()
        job.Status.State = jobs.JobStateDone
        job.Status.ErrorResult = &bqtypes.ErrorProto{
                Reason:  reason,
                Message: bqStyleMessage(err.Error()),
        }
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
}

// finalizeDoneJob stamps the success terminus on a PENDING job and
// caches the streamed result on the registry entry so a follow-up
// `jobs.getQueryResults` replays the same schema + rows without a
// re-execute. The loopback gating on `EmulatorRoute` mirrors what
// `QueryRun` does: only loopback callers see the debug field.
func finalizeDoneJob(_ Dependencies, job *jobs.Job, start, end time.Time,
        schema *enginepb.TableSchema, dmlStats *enginepb.DmlStats, rows []bqtypes.Row,
        statementType, emulatorRoute string, emulatorPhases map[string]int64,
        ddlTarget *bqtypes.RoutineReference,
        sessionInfo *bqtypes.SessionInfo, r *http.Request,
) {
        job.Status.State = jobs.JobStateDone
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
        job.Statistics.TotalBytesProcessed = "0"
        stampJobSessionInfo(job, sessionInfo)
        stampQueryJobDestination(job.JobReference.ProjectID, job, statementType)
        restSchema := schemaFromProto(schema)
        restDmlStats := dmlStatsFromProto(dmlStats)
        visibleRoute := ""
        visiblePhases := map[string]int64(nil)
        if middleware.IsLoopback(r.Context()) {
                visibleRoute = emulatorRoute
                visiblePhases = emulatorPhases
        }
        if statementType != "" || visibleRoute != "" || len(visiblePhases) > 0 || ddlTarget != nil {
                job.Statistics.Query = &bqtypes.JobStatistics2{
                        StatementType:    statementType,
                        EmulatorRoute:    visibleRoute,
                        EmulatorPhases:   visiblePhases,
                        DdlTargetRoutine: ddlTarget,
                }
        }
        job.Result = &jobs.QueryResult{
                Schema:           restSchema,
                Rows:             rows,
                DmlStats:         restDmlStats,
                StatementType:    statementType,
                EmulatorRoute:    visibleRoute,
                EmulatorPhases:   visiblePhases,
                DdlTargetRoutine: ddlTarget,
        }
}

package handlers

import (
        "errors"
        "io"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// drainSyncStream is the JobInsert flavor of `streamQueryResults`:
// same proto contract, different error reporting. Whereas
// `streamQueryResults` writes an HTTP envelope and returns an
// `ok=false` short-circuit, this helper returns the raw stream error
// so the caller can fold it into the Job's status.
func drainSyncStream(stream enginepb.Query_ExecuteQueryClient) (
        *enginepb.TableSchema, *enginepb.DmlStats, []bqtypes.Row, string, string, map[string]int64, error,
) {
        var schema *enginepb.TableSchema
        var dmlStats *enginepb.DmlStats
        var statementType string
        var emulatorRoute string
        var emulatorPhases map[string]int64
        rows := make([]bqtypes.Row, 0)
        for {
                msg, err := stream.Recv()
                if err != nil {
                        if errors.Is(err, io.EOF) {
                                break
                        }
                        return nil, nil, nil, "", "", nil, err
                }
                if s := msg.GetSchema(); s != nil {
                        if schema == nil {
                                schema = s
                        }
                        continue
                }
                if d := msg.GetDmlStats(); d != nil {
                        if dmlStats == nil {
                                dmlStats = d
                        }
                        continue
                }
                if st := msg.GetStatementType(); st != "" {
                        if statementType == "" {
                                statementType = st
                        }
                        continue
                }
                if er := msg.GetEmulatorRoute(); er != "" {
                        if emulatorRoute == "" {
                                emulatorRoute = er
                        }
                        continue
                }
                if pt := msg.GetPhaseTimings(); pt != nil && len(pt.GetPhases()) > 0 {
                        if emulatorPhases == nil {
                                emulatorPhases = make(map[string]int64, len(pt.GetPhases()))
                        }
                        for _, phase := range pt.GetPhases() {
                                if phase.GetName() != "" {
                                        emulatorPhases[phase.GetName()] = phase.GetDurationUs()
                                }
                        }
                        continue
                }
                rows = append(rows, bqtypes.CellsToRowForSchema(msg.GetCells(), schema))
        }
        return schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, nil
}

// defaultDatasetID extracts the dataset ID from an optional
// `defaultDataset` reference, returning empty when the field is
// absent. The wire field on the engine carries the dataset ID only;
// the project comes from `project_id`.
func defaultDatasetID(ref *bqtypes.DatasetReference) string {
        if ref == nil {
                return ""
        }
        return ref.DatasetID
}

// resolveDefaultDataset picks the effective default dataset for a
// query/job: the request's own `defaultDataset` wins, and when it is
// absent the gateway falls back to the server-level default
// (`--dataset`). This mirrors a production BigQuery client that sets
// `default_dataset` once and omits it on individual requests. An empty
// result means no default (bare table names error like production).
func resolveDefaultDataset(deps Dependencies, ref *bqtypes.DatasetReference) string {
        if ds := defaultDatasetID(ref); ds != "" {
                return ds
        }
        return deps.DefaultDatasetID
}

package handlers

import (
        "encoding/json"
        "io"
        "net/http"
        "strconv"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/copy"
        "github.com/vantaboard/bigquery-emulator/gateway/extract"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
)

// runSyncLoadInsert accepts a load-job body, fetches source bytes, parses
// supported formats, and bulk-inserts into the destination table.
func runSyncLoadInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
        posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
        projectID := r.PathValue("projectId")
        job := newPendingJob(deps, projectID, posted, cfg)
        start := time.Now().UTC()
        if deps.Catalog == nil {
                finalizeDeferredDataPlaneJob(job, cfg, start, "load")
                writeJSON(w, http.StatusOK, job)
                return
        }
        result, err := load.Execute(r.Context(), deps.Catalog, cfg.Load, projectID)
        if err != nil {
                finalizeFailedDataPlaneJob(job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        persistLoadTableMetadata(deps, cfg.Load, projectID)
        finalizeSuccessfulLoadJob(job, start, result)
        writeJSON(w, http.StatusOK, job)
}

// runSyncCopyInsert executes a copy job via engine SQL or catalog row copy.
func runSyncCopyInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
        posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
        projectID := r.PathValue("projectId")
        job := newPendingJob(deps, projectID, posted, cfg)
        start := time.Now().UTC()
        if deps.Catalog == nil {
                finalizeDeferredDataPlaneJob(job, cfg, start, "copy")
                writeJSON(w, http.StatusOK, job)
                return
        }
        result, err := copy.Execute(r.Context(), deps.Catalog, deps.Query, deps.Snapshots, cfg.Copy, projectID)
        if err != nil {
                finalizeFailedDataPlaneJob(job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        persistCopyTableMetadata(deps, cfg.Copy, projectID)
        finalizeSuccessfulCopyJob(job, start, result)
        writeJSON(w, http.StatusOK, job)
}

// persistCopyTableMetadata stashes REST-only destination metadata from copy
// jobs (snapshot typing, expiration) so tables.get round-trips what the job
// supplied.
func persistCopyTableMetadata(deps Dependencies, cfg *jobs.JobConfigurationCopy, projectID string) {
        if deps.Metadata == nil || cfg == nil || cfg.DestinationTable == nil {
                return
        }
        op := copy.NormalizeOperationType(cfg.OperationType)
        exp := strings.TrimSpace(cfg.DestinationExpirationTime)
        if op == copy.OperationCopy && exp == "" {
                return
        }
        destProject := cfg.DestinationTable.ProjectID
        if destProject == "" {
                destProject = projectID
        }
        patch := bqtypes.Table{}
        if exp != "" {
                patch.ExpirationTime = bqtypes.MillisTimestamp(exp)
        }
        switch op {
        case copy.OperationSnapshot:
                patch.Type = snapshotTableType
        case copy.OperationRestore:
                patch.Type = defaultTableType
        }
        deps.Metadata.MergeTable(destProject, cfg.DestinationTable.DatasetID,
                cfg.DestinationTable.TableID, patch)
}

// runSyncExtractInsert reads table rows and uploads CSV/JSON to GCS.
func runSyncExtractInsert(deps Dependencies, w http.ResponseWriter, r *http.Request,
        posted *jobs.Job, cfg *jobs.JobConfiguration,
) {
        projectID := r.PathValue("projectId")
        job := newPendingJob(deps, projectID, posted, cfg)
        start := time.Now().UTC()
        if deps.Catalog == nil {
                finalizeDeferredDataPlaneJob(job, cfg, start, "extract")
                writeJSON(w, http.StatusOK, job)
                return
        }
        result, err := extract.Execute(r.Context(), deps.Catalog, cfg.Extract, projectID)
        if err != nil {
                finalizeFailedDataPlaneJob(job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        finalizeSuccessfulExtractJob(job, start, result)
        writeJSON(w, http.StatusOK, job)
}

func finalizeSuccessfulCopyJob(job *jobs.Job, start time.Time, result copy.Result) {
        end := time.Now().UTC()
        job.Status.State = jobs.JobStateDone
        job.Status.ErrorResult = nil
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
        job.Statistics.Copy = copy.FormatStatistics(result)
}

func finalizeSuccessfulExtractJob(job *jobs.Job, start time.Time, result extract.Result) {
        end := time.Now().UTC()
        job.Status.State = jobs.JobStateDone
        job.Status.ErrorResult = nil
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
        job.Statistics.Extract = extract.FormatStatistics(result)
}

func finalizeSuccessfulLoadJob(job *jobs.Job, start time.Time, result load.Result) {
        end := time.Now().UTC()
        job.Status.State = jobs.JobStateDone
        job.Status.ErrorResult = nil
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
        job.Statistics.Load = load.FormatStatistics(result)
}

// persistLoadTableMetadata stashes REST-only destination metadata (CMEK,
// clustering, time partitioning) so tables.get round-trips what the load
// job supplied.
func persistLoadTableMetadata(deps Dependencies, cfg *jobs.JobConfigurationLoad, projectID string) {
        if deps.Metadata == nil || cfg == nil || cfg.DestinationTable == nil {
                return
        }
        if cfg.DestinationEncryptionConfiguration == nil &&
                cfg.Clustering == nil && cfg.TimePartitioning == nil {
                return
        }
        destProject := cfg.DestinationTable.ProjectID
        if destProject == "" {
                destProject = projectID
        }
        deps.Metadata.MergeTable(destProject, cfg.DestinationTable.DatasetID,
                cfg.DestinationTable.TableID, bqtypes.Table{
                        EncryptionConfiguration: cfg.DestinationEncryptionConfiguration,
                        Clustering:              cfg.Clustering,
                        TimePartitioning:        cfg.TimePartitioning,
                })
}

func finalizeDeferredDataPlaneJob(job *jobs.Job, cfg *jobs.JobConfiguration, start time.Time, kind string) {
        end := time.Now().UTC()
        job.Status.State = jobs.JobStateDone
        job.Status.ErrorResult = &bqtypes.ErrorProto{
                Reason: reasonNotImplemented,
                Message: "jobs.insert: " + kind + " job data plane is unavailable; " +
                        "load / copy / extract execution requires an engine catalog connection.",
        }
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
        switch kind {
        case "load":
                inputFiles := "0"
                if cfg.Load != nil {
                        inputFiles = strconv.Itoa(len(cfg.Load.SourceURIs))
                }
                job.Statistics.Load = &jobs.LoadStatistics{
                        InputFiles:     inputFiles,
                        InputFileBytes: "0",
                        OutputRows:     "0",
                        OutputBytes:    "0",
                        BadRecords:     "0",
                }
        case "copy":
                job.Statistics.Copy = &jobs.CopyStatistics{
                        CopiedRows:         "0",
                        CopiedLogicalBytes: "0",
                }
        case "extract":
                var counts []string
                if cfg.Extract != nil && len(cfg.Extract.DestinationURIs) > 0 {
                        counts = make([]string, len(cfg.Extract.DestinationURIs))
                        for i := range counts {
                                counts[i] = "0"
                        }
                }
                job.Statistics.Extract = &jobs.ExtractStatistics{
                        DestinationURIFileCounts: counts,
                        InputBytes:               "0",
                }
        }
}

func handleJobInsertUploadPost(deps Dependencies, store *load.UploadStore,
        w http.ResponseWriter, r *http.Request,
) {
        uploadType := r.URL.Query().Get("uploadType")
        switch uploadType {
        case "multipart":
                handleMultipartLoadUpload(deps, w, r)
        case "resumable":
                handleResumableLoadUploadInit(store, w, r)
        default:
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "uploadType must be multipart or resumable")
        }
}

func handleMultipartLoadUpload(deps Dependencies, w http.ResponseWriter, r *http.Request) {
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "Could not read upload body: "+err.Error())
                return
        }
        metadata, media, err := load.ParseMultipartJob(body, r.Header.Get("Content-Type"))
        if err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
                return
        }
        runUploadedLoadJob(deps, w, r, metadata, media)
}

func handleResumableLoadUploadInit(store *load.UploadStore, w http.ResponseWriter, r *http.Request) {
        projectID := r.PathValue("projectId")
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "Could not read upload metadata: "+err.Error())
                return
        }
        var total int64 = -1
        if v := strings.TrimSpace(r.Header.Get("X-Upload-Content-Length")); v != "" {
                total, err = strconv.ParseInt(v, 10, 64)
                if err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid,
                                "invalid X-Upload-Content-Length")
                        return
                }
        }
        uploadID := store.CreateSession(projectID, body, total)
        w.Header().Set("Location", load.AbsoluteSessionLocation(
                requestEmulatorBaseURL(r), projectID, uploadID))
        w.WriteHeader(http.StatusOK)
}

func handleJobInsertUploadPut(deps Dependencies, store *load.UploadStore,
        w http.ResponseWriter, r *http.Request,
) {
        if r.URL.Query().Get("uploadType") != "resumable" {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "PUT upload requires uploadType=resumable")
                return
        }
        uploadID := r.URL.Query().Get("upload_id")
        if uploadID == "" {
                writeError(w, http.StatusBadRequest, reasonInvalid, "upload_id is required")
                return
        }
        sess := store.Get(uploadID)
        if sess == nil {
                writeError(w, http.StatusNotFound, reasonNotFound, "upload session not found")
                return
        }

        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "Could not read upload chunk: "+err.Error())
                return
        }

        media, done, err := finalizeResumableChunk(store, uploadID, sess, r.Header.Get("Content-Range"), body)
        if err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
                return
        }
        if !done {
                load.WriteResumeIncomplete(w, store.ReceivedBytes(uploadID))
                return
        }

        store.Delete(uploadID)
        runUploadedLoadJob(deps, w, r, sess.Metadata, media)
}

func finalizeResumableChunk(store *load.UploadStore, uploadID string, sess *load.UploadSession,
        contentRange string, body []byte,
) (media []byte, done bool, err error) {
        if contentRange != "" && len(body) == 0 {
                return nil, false, nil
        }
        switch {
        case contentRange != "":
                return appendResumableRange(store, uploadID, contentRange, body)
        case len(body) > 0:
                if aerr := store.AppendBytes(uploadID, body, 0); aerr != nil {
                        return nil, false, aerr
                }
                sess = store.Get(uploadID)
                return sess.Data, true, nil
        default:
                received := store.ReceivedBytes(uploadID)
                if sess.Total > 0 && received < sess.Total {
                        return nil, false, nil
                }
                return sess.Data, true, nil
        }
}

func appendResumableRange(store *load.UploadStore, uploadID, contentRange string, body []byte,
) ([]byte, bool, error) {
        start, end, total, ok := load.ParseContentRange(contentRange)
        if !ok {
                return nil, false, errInvalidContentRange
        }
        if int64(len(body)) != end-start+1 {
                return nil, false, errContentRangeLength
        }
        if err := store.AppendBytes(uploadID, body, start); err != nil {
                return nil, false, err
        }
        received := store.ReceivedBytes(uploadID)
        if total > 0 && received < total {
                return nil, false, nil
        }
        sess := store.Get(uploadID)
        return sess.Data, true, nil
}

var (
        errInvalidContentRange = errUpload("invalid Content-Range")
        errContentRangeLength  = errUpload("Content-Range length mismatch")
)

type errUpload string

func (e errUpload) Error() string { return string(e) }

func runUploadedLoadJob(deps Dependencies, w http.ResponseWriter, r *http.Request,
        metadata, media []byte,
) {
        var posted jobs.Job
        if len(metadata) > 0 {
                if err := json.Unmarshal(metadata, &posted); err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid,
                                "Could not parse upload metadata as JSON: "+err.Error())
                        return
                }
        }
        cfg := posted.Configuration
        if cfg == nil || cfg.Load == nil {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "upload metadata must include configuration.load")
                return
        }
        projectID := r.PathValue("projectId")
        job := newPendingJob(deps, projectID, &posted, cfg)
        start := time.Now().UTC()
        if deps.Catalog == nil {
                finalizeDeferredDataPlaneJob(job, cfg, start, "load")
                writeJSON(w, http.StatusOK, job)
                return
        }
        result, err := load.ExecuteFromBytes(r.Context(), deps.Catalog, cfg.Load, projectID, media)
        if err != nil {
                finalizeFailedDataPlaneJob(job, start, err)
                writeJSON(w, http.StatusOK, job)
                return
        }
        persistLoadTableMetadata(deps, cfg.Load, projectID)
        finalizeSuccessfulLoadJob(job, start, result)
        writeJSON(w, http.StatusOK, job)
}

package handlers

import (
        "encoding/json"
        "sync"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// MetadataStore caches REST-only Dataset/Table metadata that the engine
// catalog does not yet persist on the C++ side: `labels`,
// `defaultCollation`, `expirationTime`, `rangePartitioning`,
// `timePartitioning`, `clustering`, plus the smaller bookkeeping fields
// (`friendlyName`, `description`). The handler layer populates the
// store from Insert/Patch/Update bodies and reads it back in Get so
// client libraries observe the values they wrote.
//
// Lifetime: in-memory, per-gateway-process. Survives until the gateway
// restarts. For thirdparty test runs, the `THIRDPARTY_FRESH_VOLUME=1`
// path in `taskfiles/thirdparty.yml` wipes the engine volume on
// bringup, which intentionally aligns with the cache being empty at
// startup.
//
// Persistence is a separate (larger) plan: extending the engine
// `RegisterTable` / `DescribeTable` protos and the on-disk meta
// sidecar so values survive restart. Until then the in-memory store is
// the minimum-viable round-trip and is gated on the gateway speaking
// to the engine; gateway-only (`--engine_binary=""`) modes keep the
// echo posture they had before.
//
// Thread-safety: protected by an RWMutex. Lookups (the hot path for
// list samples) take the read lock; mutations take the write lock.
type MetadataStore struct {
        mu       sync.RWMutex
        tables   map[string]bqtypes.Table
        datasets map[string]bqtypes.Dataset
}

// NewMetadataStore returns an empty, thread-safe MetadataStore.
func NewMetadataStore() *MetadataStore {
        return &MetadataStore{
                tables:   map[string]bqtypes.Table{},
                datasets: map[string]bqtypes.Dataset{},
        }
}

func tableKey(projectID, datasetID, tableID string) string {
        return projectID + ":" + datasetID + "." + tableID
}

func datasetKey(projectID, datasetID string) string {
        return projectID + ":" + datasetID
}

// PutTable records the round-trippable metadata fields for a table.
// Only the REST-only fields (labels, expirationTime, rangePartitioning,
// clustering, defaultCollation, friendlyName, description, view, type,
// requirePartitionFilter) are kept; engine-owned fields like Schema /
// NumRows fall through to the engine's DescribeTable response on Get.
func (s *MetadataStore) PutTable(projectID, datasetID, tableID string, t bqtypes.Table) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := tableKey(projectID, datasetID, tableID)
        existing, hadExisting := s.tables[key]
        stored := stripEngineOwnedTableFields(t)
        bumpTableTimestamps(&stored, existing, hadExisting)
        s.tables[key] = stored
}

// MergeTable overlays sparse PATCH/UPDATE fields onto any cached entry.
func (s *MetadataStore) MergeTable(projectID, datasetID, tableID string, patch bqtypes.Table) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := tableKey(projectID, datasetID, tableID)
        existing := s.tables[key]
        merged := mergeTableMetadataOverlay(existing, stripEngineOwnedTableFields(patch))
        bumpTableTimestamps(&merged, existing, true)
        s.tables[key] = merged
}

// GetTable returns the cached REST-only metadata for the table and a
// bool indicating whether the entry was present. Callers must merge
// the result with the engine's DescribeTable response themselves to
// build the full GET shape.
func (s *MetadataStore) GetTable(projectID, datasetID, tableID string) (bqtypes.Table, bool) {
        if s == nil {
                return bqtypes.Table{}, false
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        t, ok := s.tables[tableKey(projectID, datasetID, tableID)]
        return t, ok
}

// DeleteTable evicts the table entry so a subsequent Insert against
// the same ID does not surface stale metadata.
func (s *MetadataStore) DeleteTable(projectID, datasetID, tableID string) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        delete(s.tables, tableKey(projectID, datasetID, tableID))
}

func (s *MetadataStore) RestMetadataJSON(projectID, datasetID string) string {
        if s == nil {
                return ""
        }
        s.mu.RLock()
        ds, ok := s.datasets[datasetKey(projectID, datasetID)]
        s.mu.RUnlock()
        if !ok {
                return ""
        }
        stored := stripEngineOwnedDatasetFields(ds)
        raw, err := json.Marshal(stored)
        if err != nil {
                return ""
        }
        return string(raw)
}

// RestoreDatasetRestMetadataJSON overlays gateway REST metadata from an
// engine tombstone snapshot (`UndeleteDatasetResponse.rest_metadata_json`).
func (s *MetadataStore) RestoreDatasetRestMetadataJSON(
        projectID, datasetID, restMetadataJSON string,
) {
        if s == nil || restMetadataJSON == "" {
                return
        }
        var ds bqtypes.Dataset
        if err := json.Unmarshal([]byte(restMetadataJSON), &ds); err != nil {
                return
        }
        s.PutDataset(projectID, datasetID, ds)
}

// PutDataset records the round-trippable metadata fields for a dataset.
func (s *MetadataStore) PutDataset(projectID, datasetID string, ds bqtypes.Dataset) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := datasetKey(projectID, datasetID)
        existing, hadExisting := s.datasets[key]
        stored := stripEngineOwnedDatasetFields(ds)
        bumpDatasetTimestamps(&stored, existing, hadExisting)
        s.datasets[key] = stored
}

// MergeDataset overlays sparse PATCH/UPDATE fields onto any cached entry.
func (s *MetadataStore) MergeDataset(projectID, datasetID string, patch bqtypes.Dataset) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := datasetKey(projectID, datasetID)
        existing := s.datasets[key]
        merged := mergeDatasetMetadataOverlay(existing, stripEngineOwnedDatasetFields(patch))
        bumpDatasetTimestamps(&merged, existing, true)
        s.datasets[key] = merged
}

// GetDataset returns the cached REST-only metadata for the dataset.
func (s *MetadataStore) GetDataset(projectID, datasetID string) (bqtypes.Dataset, bool) {
        if s == nil {
                return bqtypes.Dataset{}, false
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        ds, ok := s.datasets[datasetKey(projectID, datasetID)]
        return ds, ok
}

// DeleteDataset evicts the dataset entry. Does NOT cascade into the
// per-table entries: DatasetDelete with `deleteContents=true` does
// that explicitly because the handler knows the dataset's tables.
func (s *MetadataStore) DeleteDataset(projectID, datasetID string) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        delete(s.datasets, datasetKey(projectID, datasetID))
}

// DeleteTablesInDataset removes every cached table entry that belongs
// to the given dataset. Called from DatasetDelete when the caller sets
// `deleteContents=true` so a recreate with the same dataset ID does
// not inherit stale table metadata.
func (s *MetadataStore) DeleteTablesInDataset(projectID, datasetID string) {
        if s == nil {
                return
        }
        prefix := projectID + ":" + datasetID + "."
        s.mu.Lock()
        defer s.mu.Unlock()
        for k := range s.tables {
                if len(k) > len(prefix) && k[:len(prefix)] == prefix {
                        delete(s.tables, k)
                }
        }
}

// stripEngineOwnedTableFields keeps only the REST-only metadata
// fields. Bookkeeping fields the handler stamps (Kind/ID/Timestamps)
// and engine-owned fields (Schema/NumRows/NumBytes) are dropped so a
// PATCH that echoes the prior GET cannot recursively store a stale
// schema. The Get handler re-merges the engine-side schema on every
// read.
func stripEngineOwnedTableFields(t bqtypes.Table) bqtypes.Table {
        return bqtypes.Table{
                FriendlyName:              t.FriendlyName,
                Description:               t.Description,
                Labels:                    t.Labels,
                ExpirationTime:            t.ExpirationTime,
                RangePartitioning:         t.RangePartitioning,
                TimePartitioning:          t.TimePartitioning,
                Clustering:                t.Clustering,
                DefaultCollation:          t.DefaultCollation,
                DefaultCollationSet:       t.DefaultCollationSet,
                DefaultRoundingMode:       t.DefaultRoundingMode,
                CaseInsensitive:           t.CaseInsensitive,
                ResourceTags:              t.ResourceTags,
                TableConstraints:          t.TableConstraints,
                Replicas:                  t.Replicas,
                CreationTime:              t.CreationTime,
                LastModifiedTime:          t.LastModifiedTime,
                Type:                      t.Type,
                View:                      t.View,
                MaterializedView:          t.MaterializedView,
                RequirePartitionFilter:    t.RequirePartitionFilter,
                ExternalDataConfiguration: t.ExternalDataConfiguration,
                EncryptionConfiguration:   t.EncryptionConfiguration,
                Schema:                    bqtypes.ExtractSchemaPolicyOverlay(t.Schema),
        }
}

// stripEngineOwnedDatasetFields is the dataset analogue.
func stripEngineOwnedDatasetFields(ds bqtypes.Dataset) bqtypes.Dataset {
        return bqtypes.Dataset{
                FriendlyName:                 ds.FriendlyName,
                Description:                  ds.Description,
                Location:                     ds.Location,
                Access:                       ds.Access,
                Labels:                       ds.Labels,
                DefaultTableExpirationMs:     ds.DefaultTableExpirationMs,
                DefaultPartitionExpirationMs: ds.DefaultPartitionExpirationMs,
                DefaultCollation:             ds.DefaultCollation,
                DefaultCollationSet:          ds.DefaultCollationSet,
                DefaultRoundingMode:          ds.DefaultRoundingMode,
                MaxTimeTravelHours:           ds.MaxTimeTravelHours,
                IsCaseInsensitive:            ds.IsCaseInsensitive,
                ResourceTags:                 ds.ResourceTags,
                Replicas:                     ds.Replicas,
                CreationTime:                 ds.CreationTime,
                LastModifiedTime:             ds.LastModifiedTime,
        }
}

// applyTableMetadataOverlay merges the cached REST-only fields onto
// the engine-derived table resource. Cached values win over the engine
// shape for the REST-only fields, but engine-owned fields (Schema,
// NumRows, ...) are preserved.
func applyTableMetadataOverlay(base bqtypes.Table, overlay bqtypes.Table) bqtypes.Table {
        if overlay.FriendlyName != "" {
                base.FriendlyName = overlay.FriendlyName
        }
        if overlay.Description != "" {
                base.Description = overlay.Description
        }
        if overlay.Labels != nil {
                base.Labels = overlay.Labels
        }
        if overlay.ExpirationTime != "" {
                base.ExpirationTime = overlay.ExpirationTime
        }
        if overlay.RangePartitioning != nil {
                base.RangePartitioning = overlay.RangePartitioning
        }
        if overlay.TimePartitioning != nil {
                base.TimePartitioning = overlay.TimePartitioning
        }
        if overlay.Clustering != nil {
                base.Clustering = overlay.Clustering
        }
        if overlay.DefaultCollationSet {
                base.DefaultCollation = overlay.DefaultCollation
                base.DefaultCollationSet = true
        }
        overlayTableExtendedFields(&base, overlay)
        overlayTableDefinitionFields(&base, overlay)
        return base
}

// mergeTableMetadataOverlay merges sparse metadata updates onto a
// cached table entry. Unlike applyTableMetadataOverlay (used at GET
// time against engine-derived resources), this helper treats empty
// strings and nil maps as "not provided" so PATCH bodies can carry
// only the fields being changed.
func mergeTableMetadataOverlay(base, patch bqtypes.Table) bqtypes.Table {
        if patch.FriendlyName != "" {
                base.FriendlyName = patch.FriendlyName
        }
        if patch.Description != "" {
                base.Description = patch.Description
        }
        if patch.LabelsPatchPresent() {
                base.Labels = bqtypes.ApplyLabelsPatch(
                        base.Labels, true, patch.Labels, patch.LabelsToDelete(),
                )
        } else if patch.Labels != nil {
                base.Labels = patch.Labels
        }
        if patch.ExpirationTime != "" {
                base.ExpirationTime = patch.ExpirationTime
        }
        if patch.RangePartitioning != nil {
                base.RangePartitioning = patch.RangePartitioning
        }
        if patch.TimePartitioning != nil {
                base.TimePartitioning = patch.TimePartitioning
        }
        if patch.Clustering != nil {
                base.Clustering = patch.Clustering
        }
        if patch.DefaultCollationSet {
                base.DefaultCollation = patch.DefaultCollation
                base.DefaultCollationSet = true
        }
        overlayTableExtendedFields(&base, patch)
        overlayTableDefinitionFields(&base, patch)
        return base
}

func overlayTableExtendedFields(base *bqtypes.Table, src bqtypes.Table) {
        if src.DefaultRoundingMode != "" {
                base.DefaultRoundingMode = src.DefaultRoundingMode
        }
        if src.CaseInsensitive != nil {
                base.CaseInsensitive = src.CaseInsensitive
        }
        if src.ResourceTags != nil {
                base.ResourceTags = src.ResourceTags
        }
        if src.TableConstraints != nil {
                base.TableConstraints = src.TableConstraints
        }
        if src.Replicas != nil {
                base.Replicas = src.Replicas
        }
        if src.CreationTime != "" {
                base.CreationTime = src.CreationTime
        }
        if src.LastModifiedTime != "" {
                base.LastModifiedTime = src.LastModifiedTime
        }
}

func overlayTableDefinitionFields(base *bqtypes.Table, src bqtypes.Table) {
        if src.Type != "" {
                base.Type = src.Type
        }
        if src.View != nil {
                base.View = src.View
        }
        if src.MaterializedView != nil {
                base.MaterializedView = src.MaterializedView
        }
        if src.RequirePartitionFilter != nil {
                base.RequirePartitionFilter = src.RequirePartitionFilter
        }
        if src.ExternalDataConfiguration != nil {
                base.ExternalDataConfiguration = src.ExternalDataConfiguration
        }
        if src.EncryptionConfiguration != nil {
                base.EncryptionConfiguration = src.EncryptionConfiguration
        }
        if src.Schema != nil {
                base.Schema = bqtypes.MergeSchemaPolicyTags(base.Schema, src.Schema)
        }
}

// applyDatasetMetadataOverlay is the dataset analogue.
func applyDatasetMetadataOverlay(base bqtypes.Dataset, overlay bqtypes.Dataset) bqtypes.Dataset {
        if overlay.FriendlyName != "" {
                base.FriendlyName = overlay.FriendlyName
        }
        if overlay.Description != "" {
                base.Description = overlay.Description
        }
        if overlay.Location != "" {
                base.Location = overlay.Location
        }
        if overlay.Access != nil {
                base.Access = overlay.Access
        }
        if overlay.Labels != nil {
                base.Labels = overlay.Labels
        }
        if overlay.DefaultTableExpirationMs != "" {
                base.DefaultTableExpirationMs = overlay.DefaultTableExpirationMs
        }
        if overlay.DefaultPartitionExpirationMs != "" {
                base.DefaultPartitionExpirationMs = overlay.DefaultPartitionExpirationMs
        }
        if overlay.DefaultCollationSet {
                base.DefaultCollation = overlay.DefaultCollation
                base.DefaultCollationSet = true
        }
        if overlay.DefaultRoundingMode != "" {
                base.DefaultRoundingMode = overlay.DefaultRoundingMode
        }
        if overlay.MaxTimeTravelHours != "" {
                base.MaxTimeTravelHours = overlay.MaxTimeTravelHours
        }
        if overlay.IsCaseInsensitive != nil {
                base.IsCaseInsensitive = overlay.IsCaseInsensitive
        }
        if overlay.ResourceTags != nil {
                base.ResourceTags = overlay.ResourceTags
        }
        if overlay.Replicas != nil {
                base.Replicas = overlay.Replicas
        }
        if overlay.CreationTime != "" {
                base.CreationTime = overlay.CreationTime
        }
        if overlay.LastModifiedTime != "" {
                base.LastModifiedTime = overlay.LastModifiedTime
        }
        return base
}

// mergeDatasetMetadataOverlay merges sparse dataset metadata updates.
func mergeDatasetMetadataOverlay(base, patch bqtypes.Dataset) bqtypes.Dataset {
        if patch.FriendlyName != "" {
                base.FriendlyName = patch.FriendlyName
        }
        if patch.Description != "" {
                base.Description = patch.Description
        }
        if patch.Location != "" {
                base.Location = patch.Location
        }
        if patch.Access != nil {
                base.Access = patch.Access
        }
        if patch.LabelsPatchPresent() {
                base.Labels = bqtypes.ApplyLabelsPatch(
                        base.Labels, true, patch.Labels, patch.LabelsToDelete(),
                )
        } else if patch.Labels != nil {
                base.Labels = patch.Labels
        }
        if patch.DefaultTableExpirationMs != "" {
                base.DefaultTableExpirationMs = patch.DefaultTableExpirationMs
        }
        if patch.DefaultPartitionExpirationMs != "" {
                base.DefaultPartitionExpirationMs = patch.DefaultPartitionExpirationMs
        }
        if patch.DefaultCollationSet {
                base.DefaultCollation = patch.DefaultCollation
                base.DefaultCollationSet = true
        }
        if patch.DefaultRoundingMode != "" {
                base.DefaultRoundingMode = patch.DefaultRoundingMode
        }
        if patch.MaxTimeTravelHours != "" {
                base.MaxTimeTravelHours = patch.MaxTimeTravelHours
        }
        if patch.IsCaseInsensitive != nil {
                base.IsCaseInsensitive = patch.IsCaseInsensitive
        }
        if patch.ResourceTags != nil {
                base.ResourceTags = patch.ResourceTags
        }
        if patch.Replicas != nil {
                base.Replicas = patch.Replicas
        }
        return base
}

func bumpDatasetTimestamps(stored *bqtypes.Dataset, existing bqtypes.Dataset, hadExisting bool) {
        if hadExisting && existing.CreationTime != "" {
                stored.CreationTime = existing.CreationTime
        } else if stored.CreationTime == "" {
                stored.CreationTime = nowMillis()
        }
        stored.LastModifiedTime = nowMillis()
}

func bumpTableTimestamps(stored *bqtypes.Table, existing bqtypes.Table, hadExisting bool) {
        if hadExisting && existing.CreationTime != "" {
                stored.CreationTime = existing.CreationTime
        } else if stored.CreationTime == "" {
                stored.CreationTime = nowMillis()
        }
        stored.LastModifiedTime = nowMillis()
}

package handlers

import (
        "crypto/rand"
        "encoding/hex"
        "encoding/json"
        "io"
        "net/http"
        "sort"
        "strings"
        "sync"
        "time"
)

// BigQuery Migration v2alpha REST shell.
//
// The upstream BigQuery Migration API runs at
// `https://bigquerymigration.googleapis.com/v2alpha/...` (and the
// v2 alias at the same host). Client libraries
// (cloud.google.com/go/bigquery/migration/apiv2alpha,
// google-cloud-bigquery-migration for Python/Node/Java) read
// `BIGQUERY_MIGRATION_EMULATOR_HOST` and fall back to
// `BIGQUERY_EMULATOR_HOST` so this gateway can serve both surfaces
// from the same listener.
//
// This shell keeps workflow metadata in an in-process sync.Map store
// (no AST translator, no LRO store, no subtask catalog). Create
// returns a DRAFT workflow; :start transitions it to RUNNING so
// client startup probes get structurally-valid responses.
//
// Routes registered (for both `v2alpha` and `v2`):
//   GET    /{ver}/projects/{projectId}/locations/{location}/workflows
//   POST   /{ver}/projects/{projectId}/locations/{location}/workflows
//   GET    /{ver}/projects/{projectId}/locations/{location}/workflows/{workflowId}
//   DELETE /{ver}/projects/{projectId}/locations/{location}/workflows/{workflowId}
//   POST   /{ver}/projects/{projectId}/locations/{location}/workflows/{workflowId}:start
//          (dispatched on trailing :start via MigrationWorkflowCustomMethodPOST,
//          because net/http's mux can't match `{workflowId}:start` directly.)

const (
        migrationWorkflowStateDraft   = "DRAFT"
        migrationWorkflowStateRunning = "RUNNING"
)

var migrationWorkflowStore sync.Map // canonical name -> *migrationWorkflowResource

type migrationWorkflowResource struct {
        Name           string `json:"name"`
        DisplayName    string `json:"displayName,omitempty"`
        State          string `json:"state,omitempty"`
        CreateTime     string `json:"createTime,omitempty"`
        LastUpdateTime string `json:"lastUpdateTime,omitempty"`
}

func migrationWorkflowParent(r *http.Request) string {
        return "projects/" + r.PathValue("projectId") +
                "/locations/" + r.PathValue("location")
}

func migrationWorkflowNow() string {
        return time.Now().UTC().Format(time.RFC3339Nano)
}

func migrationWorkflowMintID() string {
        var b [8]byte
        _, _ = rand.Read(b[:])
        return hex.EncodeToString(b[:])
}

func migrationWorkflowByName(name string) (*migrationWorkflowResource, bool) {
        v, ok := migrationWorkflowStore.Load(name)
        if !ok {
                return nil, false
        }
        wf, ok := v.(*migrationWorkflowResource)
        return wf, ok && wf != nil
}

// MigrationWorkflowList implements `migration.workflows.list`.
func MigrationWorkflowList(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                prefix := migrationWorkflowParent(r) + "/workflows/"
                var out []migrationWorkflowResource
                migrationWorkflowStore.Range(func(key, value any) bool {
                        name, _ := key.(string)
                        if !strings.HasPrefix(name, prefix) {
                                return true
                        }
                        wf, _ := value.(*migrationWorkflowResource)
                        if wf != nil {
                                out = append(out, *wf)
                        }
                        return true
                })
                sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name })
                workflows := make([]any, len(out))
                for i := range out {
                        workflows[i] = out[i]
                }
                writeJSON(w, http.StatusOK, map[string]any{
                        "migrationWorkflows": workflows,
                })
        }
}

// MigrationWorkflowCreate implements `migration.workflows.create`.
func MigrationWorkflowCreate(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                body, err := io.ReadAll(r.Body)
                if err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid, "invalid body")
                        return
                }
                _ = r.Body.Close()
                var in migrationWorkflowResource
                if len(strings.TrimSpace(string(body))) > 0 {
                        if err := json.Unmarshal(body, &in); err != nil {
                                writeError(w, http.StatusBadRequest, reasonInvalid,
                                        "invalid json: "+err.Error())
                                return
                        }
                }
                id := migrationWorkflowMintID()
                name := migrationWorkflowParent(r) + "/workflows/" + id
                now := migrationWorkflowNow()
                rec := migrationWorkflowResource{
                        Name:           name,
                        DisplayName:    in.DisplayName,
                        State:          migrationWorkflowStateDraft,
                        CreateTime:     now,
                        LastUpdateTime: now,
                }
                migrationWorkflowStore.Store(name, &rec)
                writeJSON(w, http.StatusOK, rec)
        }
}

// MigrationWorkflowGet implements `migration.workflows.get`.
func MigrationWorkflowGet(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                name := migrationWorkflowName(r)
                wf, ok := migrationWorkflowByName(name)
                if !ok {
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Not found: MigrationWorkflow "+name)
                        return
                }
                writeJSON(w, http.StatusOK, *wf)
        }
}

// MigrationWorkflowDelete implements `migration.workflows.delete`.
func MigrationWorkflowDelete(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                name := migrationWorkflowName(r)
                if _, ok := migrationWorkflowByName(name); !ok {
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Not found: MigrationWorkflow "+name)
                        return
                }
                migrationWorkflowStore.Delete(name)
                writeJSON(w, http.StatusOK, struct{}{})
        }
}

// MigrationWorkflowCustomMethodPOST dispatches the AIP-136 ":start"
// custom method that hangs off a workflow resource.
func MigrationWorkflowCustomMethodPOST(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                _, op := splitColonOp(r.PathValue("workflowId"))
                switch op {
                case "start":
                        name := migrationWorkflowName(r)
                        wf, ok := migrationWorkflowByName(name)
                        if !ok {
                                writeError(w, http.StatusNotFound, reasonNotFound,
                                        "Not found: MigrationWorkflow "+name)
                                return
                        }
                        switch wf.State {
                        case migrationWorkflowStateDraft:
                                wf.State = migrationWorkflowStateRunning
                                wf.LastUpdateTime = migrationWorkflowNow()
                                migrationWorkflowStore.Store(name, wf)
                        case migrationWorkflowStateRunning:
                                // no-op
                        default:
                                writeError(w, http.StatusBadRequest, reasonFailedPrecondition,
                                        "MigrationWorkflow "+name+" is not in DRAFT or RUNNING state")
                                return
                        }
                        writeJSON(w, http.StatusOK, struct{}{})
                case "":
                        writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
                                "POST is not allowed on a workflow resource. "+
                                        "Use POST .../workflows to create or :start to start.")
                default:
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Unknown migration workflow custom method ':"+op+"'.")
                }
        }
}

// migrationWorkflowName reconstructs the canonical resource name from
// the path captures so error envelopes match upstream error text.
func migrationWorkflowName(r *http.Request) string {
        wid, _ := splitColonOp(r.PathValue("workflowId"))
        return "projects/" + r.PathValue("projectId") +
                "/locations/" + r.PathValue("location") +
                "/workflows/" + strings.TrimSpace(wid)
}

package handlers

import (
        "context"
        "net/http"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/models"
)

// modelListKind is the `kind` field for a models.list response. See
// docs/bigquery/docs/reference/rest/v2/models/list.md.
const modelListKind = "bigquery#listModelsResponse"

func modelStore(deps *Dependencies) *models.Store {
        if deps.Models == nil {
                deps.Models = models.NewStore()
        }
        return deps.Models
}

func modelIDFromPath(r *http.Request) (projectID, datasetID, modelID string) {
        return r.PathValue("projectId"), r.PathValue("datasetId"), r.PathValue("modelId")
}

func modelListEntry(m bqtypes.Model) bqtypes.Model {
        return bqtypes.Model{
                ModelReference:   m.ModelReference,
                ModelType:        m.ModelType,
                CreationTime:     m.CreationTime,
                LastModifiedTime: m.LastModifiedTime,
                Labels:           m.Labels,
        }
}

// ModelList implements `bigquery.models.list`.
func ModelList(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                datasetID := r.PathValue("datasetId")
                all := modelStore(&deps).List(projectID, datasetID, r.URL.Query().Get("filter"))
                items := make([]bqtypes.Model, 0, len(all))
                for _, m := range all {
                        items = append(items, modelListEntry(m))
                }
                resp := map[string]any{
                        resourceKeyKind: modelListKind,
                        "models":        items,
                }
                if maxResults := r.URL.Query().Get("maxResults"); maxResults != "" {
                        if n, err := strconv.Atoi(maxResults); err == nil && n >= 0 && n < len(items) {
                                resp["models"] = items[:n]
                                resp["nextPageToken"] = strconv.Itoa(n)
                        }
                }
                writeJSON(w, http.StatusOK, resp)
        }
}

// ModelGet implements `bigquery.models.get`.
func ModelGet(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, modelID := modelIDFromPath(r)
                m, ok := modelStore(&deps).Get(projectID, datasetID, modelID)
                if !ok {
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Not found: Model "+projectID+":"+datasetID+"."+modelID)
                        return
                }
                writeJSON(w, http.StatusOK, m)
        }
}

// ModelPatch implements `bigquery.models.patch`.
func ModelPatch(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) { NotImplemented(w, r) }
}

// ModelDelete implements `bigquery.models.delete`.
func ModelDelete(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, modelID := modelIDFromPath(r)
                if !modelStore(&deps).Delete(projectID, datasetID, modelID) {
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Not found: Model "+projectID+":"+datasetID+"."+modelID)
                        return
                }
                w.WriteHeader(http.StatusOK)
        }
}

// persistModelFromDDL registers CREATE MODEL metadata in the in-memory store.
func persistModelFromDDL(
        _ context.Context,
        deps *Dependencies,
        projectID, defaultDatasetID, sql string,
) *bqtypes.ModelReference {
        ref := models.RegisterFromDDL(modelStore(deps), projectID, defaultDatasetID, sql)
        return ref
}

func isCreateModelSQL(sql string) bool {
        trim := strings.ToUpper(strings.TrimSpace(sql))
        return strings.HasPrefix(trim, "CREATE MODEL") ||
                strings.HasPrefix(trim, "CREATE OR REPLACE MODEL") ||
                strings.HasPrefix(trim, "CREATE MODEL IF NOT EXISTS")
}

package handlers

import (
        "net/http"
        "os"
)

// projectKind is the value the BigQuery REST API returns for the
// `kind` field of a Project resource. See
// docs/bigquery/docs/reference/rest/v2/projects/list.md.
const projectKind = "bigquery#project"

// projectListKind is the `kind` field for a ProjectList response. See
// docs/bigquery/docs/reference/rest/v2/projects/list.md.
const projectListKind = "bigquery#projectList"

// serviceAccountKind is the `kind` field for a GetServiceAccountResponse.
// See docs/bigquery/docs/reference/rest/v2/projects/getServiceAccount.md.
const serviceAccountKind = "bigquery#getServiceAccountResponse"

// defaultProjectEnvVar is the env var clients may set to override the
// synthetic project ID returned by projects.list. The conventional
// emulator project ID is `test-project`, matching the Spanner emulator
// and BigQuery client-library samples.
const defaultProjectEnvVar = "BIGQUERY_EMULATOR_PROJECT"

// defaultProjectID is the synthetic project ID returned by
// projects.list when defaultProjectEnvVar is unset. It is the value
// most BigQuery client-library sample code uses against the official
// emulator container, so callers that don't bother to configure a
// project ID still get something predictable on the wire.
const defaultProjectID = "test-project"

// defaultProjectIDFromEnv returns the synthetic project ID used by
// projects.list. It honors BIGQUERY_EMULATOR_PROJECT and falls back
// to defaultProjectID. Lookups happen per-request (cheap) so the env
// var can be flipped without restarting the gateway, which is the
// behavior tests and `task emulator:watch` users expect.
func defaultProjectIDFromEnv() string {
        if v := os.Getenv(defaultProjectEnvVar); v != "" {
                return v
        }
        return defaultProjectID
}

// projectResource is the per-entry shape inside a ProjectList. The
// fields mirror docs/bigquery/docs/reference/rest/v2/projects/list.md.
type projectResource struct {
        Kind             string           `json:"kind"`
        ID               string           `json:"id"`
        NumericID        string           `json:"numericId,omitempty"`
        ProjectReference projectReference `json:"projectReference"`
        FriendlyName     string           `json:"friendlyName,omitempty"`
}

// projectReference is BigQuery's stable handle to a project (mirrors
// the `ProjectReference` resource referenced by projects.list).
type projectReference struct {
        ProjectID string `json:"projectId"`
}

// ProjectList implements `bigquery.projects.list`:
//
//        GET /bigquery/v2/projects
//
// The emulator does not model IAM, so the response is a single
// synthetic project: BIGQUERY_EMULATOR_PROJECT if set, otherwise
// `test-project`. The shape matches
// docs/bigquery/docs/reference/rest/v2/projects/list.md so client
// libraries can iterate without special-casing the emulator.
func ProjectList(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, _ *http.Request) {
                projectID := defaultProjectIDFromEnv()
                writeJSON(w, http.StatusOK, map[string]any{
                        resourceKeyKind: projectListKind,
                        resourceKeyProjects: []projectResource{{
                                Kind: projectKind,
                                ID:   projectID,
                                ProjectReference: projectReference{
                                        ProjectID: projectID,
                                },
                        }},
                        resourceKeyTotalItems: 1,
                })
        }
}

// ProjectGetServiceAccount implements `bigquery.projects.getServiceAccount`:
//
//        GET /bigquery/v2/projects/{projectId}/serviceAccount
//
// Real BigQuery returns the per-project Google-managed service account
// used for KMS interactions. The emulator returns a synthetic email so
// client libraries that hit this endpoint at startup don't fail. The
// email is derived from the path's projectId, matching the documented
// format: `bigquery-emulator@<projectId>.iam.gserviceaccount.com`.
//
// Note: there is no `GET /bigquery/v2/projects/{projectId}` endpoint in
// the public API; this is the endpoint clients actually probe.
func ProjectGetServiceAccount(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                if projectID == "" {
                        projectID = defaultProjectIDFromEnv()
                }
                writeJSON(w, http.StatusOK, map[string]any{
                        resourceKeyKind: serviceAccountKind,
                        "email":         "bigquery-emulator@" + projectID + ".iam.gserviceaccount.com",
                })
        }
}

package handlers

import (
        "encoding/json"
        "errors"
        "io"
        "net/http"
        "strconv"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/middleware"
        "github.com/vantaboard/bigquery-emulator/gateway/query"
)

// queryResponseKind is the value the BigQuery REST API returns for the
// `kind` field of a QueryResponse resource. See
// docs/bigquery/docs/reference/rest/v2/jobs/query.md.
const queryResponseKind = "bigquery#queryResponse"

func principalEmailFromContext(r *http.Request) string {
        if p, ok := middleware.PrincipalFromContext(r.Context()); ok && p.Email != "" {
                return p.Email
        }
        return "emulator@bigquery.local"
}

// statementTypeSelect is the engine-reported statement type for read
// queries. Promoted to a package constant so goconst does not flag the
// repeated literal across handlers.
const statementTypeSelect = "SELECT"

// QueryRun implements `bigquery.jobs.query`:
//
//        POST /bigquery/v2/projects/{projectId}/queries
//
// The synchronous query API. The request body is a QueryRequest (see
// gateway/bqtypes); the response is a QueryResponse with a partial result
// page, or an empty result set + non-empty `jobReference` if the query
// is still running and the client should poll `jobs.getQueryResults`.
//
// The handler has two branches:
//
//   - dryRun=true forwards the SQL to `enginepb.Query.DryRun` (which
//     calls `googlesql::Analyzer` on the C++ side) and turns the
//     resulting analyzed schema + estimated bytes into a QueryResponse
//     with `jobComplete=true` and an empty rows page.
//   - dryRun=false (or unset) forwards the SQL to
//     `enginepb.Query.ExecuteQuery`, drains the server-streaming
//     response (first message carries the schema, subsequent messages
//     carry one row of cells each), marshals each row through
//     `bqtypes.CellsToRow`, and records a DONE Job in `deps.Jobs` so
//     the returned `jobReference` is discoverable by a later
//     `jobs.get`.
//
// SQL dialect: BigQuery's `useLegacySql` field defaults to true on the
// wire. The emulator executes GoogleSQL via the engine; limited legacy
// bracket table references (`[project:dataset.table]`) are transpiled
// in gateway/query before forwarding. Unset and `useLegacySql=false`
// are both treated as GoogleSQL.
//
// Idempotency: `requestId` provides 15-minute idempotency for matching
// requests, per the upstream docs.
func QueryRun(deps Dependencies) http.HandlerFunc {
        // Default to a per-handler Registry so unit tests that pass a
        // zero-valued Dependencies still get a working job store; the
        // server-mode path passes a process-shared Registry from
        // gateway.NewServer so jobs survive between requests.
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                body, err := io.ReadAll(r.Body)
                if err != nil {
                        writeError(w, http.StatusBadRequest, "invalid",
                                "Could not read query request body: "+err.Error())
                        return
                }
                var req bqtypes.QueryRequest
                if len(body) > 0 {
                        if err := json.Unmarshal(body, &req); err != nil {
                                writeError(w, http.StatusBadRequest, "invalid",
                                        "Could not parse query request body as JSON: "+err.Error())
                                return
                        }
                }
                if req.DryRun {
                        runQueryDryRun(deps, w, r, &req)
                        return
                }

                runQueryExecute(deps, w, r, &req)
        }
}

// runQueryDryRun handles the dryRun=true branch of QueryRun. It
// forwards the request to `enginepb.Query.DryRun`, which on the C++
// side runs the SQL through `googlesql::Analyzer` and returns the
// resolved output schema + an estimated bytes-processed value. The
// gateway folds those into a `QueryResponse` with `jobComplete=true`
// and no rows -- the BigQuery REST contract for a successful dry run.
//
// When `deps.Query` is nil (the gateway was started without an engine
// subprocess), the handler degrades to the 501 stub the rest of the
// route table uses, so unit-mode runs (`task emulator:run
// --engine_binary=""`) keep returning a structured error envelope.
func runQueryDryRun(deps Dependencies, w http.ResponseWriter, r *http.Request,
        req *bqtypes.QueryRequest,
) {
        if deps.Query == nil {
                NotImplemented(w, r)
                return
        }
        projectID := r.PathValue("projectId")

        // Pass a defaultDataset hint to the engine when the client set
        // `defaultDataset` in the QueryRequest, falling back to the
        // server-level `--dataset` default otherwise. The wire field on
        // the engine side carries the dataset id only -- the project comes
        // from `project_id`, which is always taken from the URL.
        defaultDataset := resolveDefaultDataset(deps, req.DefaultDataset)
        defaultDataset, extErr := prepareQueryExternalTables(
                r.Context(), deps, projectID, req.TableDefinitions, defaultDataset)
        if writeExternalTableError(w, extErr) {
                return
        }

        useLegacy := req.UseLegacySQL != nil && *req.UseLegacySQL
        sql, sqlErr := query.PrepareEngineSQL(useLegacy, req.Query, projectID, defaultDataset)
        if writeLegacySQLError(w, sqlErr) {
                return
        }
        engineReq := &enginepb.QueryRequest{
                ProjectId:        projectID,
                DefaultDatasetId: defaultDataset,
                Sql:              sql,
                UseLegacySql:     false,
                Parameters:       parametersToEngineMap(req.Parameters),
                PrincipalEmail:   principalEmailFromContext(r),
        }

        resp, err := deps.Query.DryRun(r.Context(), engineReq)
        if queryGRPCToHTTPError(w, err) {
                return
        }

        out := bqtypes.QueryResponse{
                Kind:                queryResponseKind,
                Schema:              schemaFromProto(resp.GetSchema()),
                TotalBytesProcessed: formatDryRunBytes(resp.GetEstimatedBytesProcessed()),
                JobComplete:         true,
        }
        writeJSON(w, http.StatusOK, out)
}

// formatDryRunBytes renders estimated bytes as the decimal string
// BigQuery REST always emits for dry-run responses.
func formatDryRunBytes(estimated int64) string {
        return jobs.FormatDryRunBytesProcessed(estimated)
}

// runQueryExecute handles the dryRun=false branch of QueryRun. It
// forwards the SQL to the engine's server-streaming
// `enginepb.Query.ExecuteQuery` RPC, drains the schema + row stream,
// marshals every row through `bqtypes.CellsToRow`, and stamps the
// resulting `QueryResponse` with a DONE jobReference recorded in
// `deps.Jobs`.
//
// Stream contract (mirrors the comment on proto QueryResultRow):
// the first message carries the schema; subsequent messages each
// carry one row's cells. The schema reader is defensive -- if a
// later message also sets `schema` it is ignored, and a message
// with neither schema nor cells contributes an empty row.
//
// When `deps.Query` is nil (the gateway was started without an
// engine subprocess), the handler degrades to the structured 501
// stub the rest of the route table uses; unit-mode runs (`task
// emulator:run --engine_binary=""`) keep returning a BigQuery-
// shaped error envelope instead of a panic.
//
//nolint:funlen // engine stream drain + session/DDL stamping in one handler
func runQueryExecute(deps Dependencies, w http.ResponseWriter, r *http.Request,
        req *bqtypes.QueryRequest,
) {
        if deps.Query == nil {
                NotImplemented(w, r)
                return
        }
        projectID := r.PathValue("projectId")

        if parseAbortSessionSQL(req.Query) {
                handleAbortSessionQuery(deps, w, projectID, req.Location, req.ConnProperties)
                return
        }

        defaultDataset, ok := queryDefaultDatasetForExecute(deps, w, r, projectID, req)
        if !ok {
                return
        }

        if isMultiStatementScript(req.Query) {
                runQueryScriptExecute(deps, w, r, req, defaultDataset)
                return
        }

        useLegacy := req.UseLegacySQL != nil && *req.UseLegacySQL
        sql := expandQueryParamsInSQL(req.Query, req.Parameters)
        bindParams := stripExpandedArrayParams(req.Query, sql, req.Parameters)
        sql, sqlErr := query.PrepareEngineSQL(useLegacy, sql, projectID, defaultDataset)
        if writeLegacySQLError(w, sqlErr) {
                return
        }
        sql, sqlErr = query.PrepareEngineSQLForJobs(r.Context(), deps.Catalog, deps.Jobs, projectID, sql)
        if sqlErr != nil {
                writeError(w, http.StatusBadRequest, reasonInvalidQuery, sqlErr.Error())
                return
        }
        engineReq := &enginepb.QueryRequest{
                ProjectId:        projectID,
                DefaultDatasetId: defaultDataset,
                Sql:              sql,
                UseLegacySql:     false,
                Parameters:       parametersToEngineMap(bindParams),
                PrincipalEmail:   principalEmailFromContext(r),
        }

        start := time.Now().UTC()
        stream, err := deps.Query.ExecuteQuery(r.Context(), engineReq)
        if queryGRPCToHTTPError(w, err) {
                return
        }
        schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, ok := streamQueryResults(w, stream)
        if !ok {
                return
        }
        end := time.Now().UTC()

        // Record the completed job (with its rows + schema cached)
        // before assembling the response so the jobReference we emit
        // is the same one a later jobs.get / jobs.getQueryResults will
        // find. The current registry does not track engine-side
        // bytes-processed yet, so we stamp 0; the long-running-jobs
        // follow-up wires the real metric.
        restSchema := schemaFromProto(schema)
        if err := query.AppendResultsFromQueryRequest(
                r.Context(), deps.Catalog, req, projectID, restSchema, rows); err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
                return
        }
        restDmlStats := dmlStatsFromProto(dmlStats)
        var ddlTarget *bqtypes.RoutineReference
        if statementType == "CREATE_FUNCTION" || statementType == "CREATE_PROCEDURE" ||
                statementType == "CREATE_TABLE_FUNCTION" {
                ddlTarget = persistRoutineFromDDL(
                        r.Context(), &deps, projectID, defaultDataset, req.Query)
        }
        if isCreateModelSQL(req.Query) {
                persistModelFromDDL(r.Context(), &deps, projectID, defaultDataset, req.Query)
        }
        handleViewDDLAfterQuery(&deps, projectID, defaultDataset, req.Query, statementType)
        result := &jobs.QueryResult{
                Schema:           restSchema,
                Rows:             rows,
                DmlStats:         restDmlStats,
                StatementType:    statementType,
                EmulatorRoute:    emulatorRoute,
                EmulatorPhases:   emulatorPhases,
                DdlTargetRoutine: ddlTarget,
        }
        sessionInfo := sessionStore(&deps).Resolve(
                projectID, req.Location, req.CreateSession, req.ConnProperties)
        job := deps.Jobs.CompleteQueryWithResult(
                projectID, req.Location, 0, start, end, result)
        job.UserEmail = principalEmailFromContext(r)
        if deps.Catalog != nil && len(rows) > 0 &&
                (statementType == "" || statementType == statementTypeSelect) {
                if dest, err := query.MaterializeImplicitDestination(
                        r.Context(), deps.Catalog, projectID, defaultDataset,
                        job.JobReference.JobID, restSchema, rows); err == nil {
                        job.Configuration = &jobs.JobConfiguration{
                                JobType: jobConfigurationKindQuery,
                                Query: &jobs.JobConfigurationQuery{
                                        Query:            req.Query,
                                        DestinationTable: dest,
                                },
                        }
                }
        }
        stampJobSessionInfo(job, sessionInfo)
        // Surface the `emulatorRoute` debug field only to loopback
        // callers so external BigQuery client libraries pointed at the
        // emulator see the same JSON shape they would against the
        // public REST surface. Non-loopback callers get an empty
        // string, which `assembleQueryResponse` translates into "no
        // emulatorRoute property" because the JSON struct tag is
        // `omitempty`. See
        // `docs/ENGINE_POLICY.md`.
        visibleRoute := ""
        visiblePhases := map[string]int64(nil)
        if middleware.IsLoopback(r.Context()) {
                visibleRoute = emulatorRoute
                visiblePhases = emulatorPhases
        }
        out := assembleQueryResponse(
                job, restSchema, rows, dmlStats, restDmlStats, statementType,
                visibleRoute, visiblePhases, ddlTarget, sessionInfo)
        writeJSON(w, http.StatusOK, out)
}

// positionalParameterMapKey is the proto map key for a positional
// query parameter (REST entries with an empty `name`). The key must
// not collide with legitimate named parameters such as `@p0`.
const positionalParameterMapKeyPrefix = "__pos_"

// into the engine's `map<string, QueryParameter>` proto field
// (defined in `proto/emulator.proto`). The gateway's wire payload
// is a list of `QueryParameter` objects, each carrying `name`,
// `parameterType`, and `parameterValue`; the engine speaks a
// name-keyed map plus a `type_kind` / `value_json` value pair.
//
// Named parameters flow through unchanged. Positional parameters use
// synthetic map keys (`p0`, `p1`, ...) because the engine proto is
// name-keyed; the frontend strips those keys before binding `?`
// placeholders.
//
// Values with a missing `parameterType` are skipped because the
// engine cannot decode them without a type tag.
func parametersToEngineMap(in []bqtypes.QueryParameter) map[string]*enginepb.QueryParameter {
        if len(in) == 0 {
                return nil
        }
        out := make(map[string]*enginepb.QueryParameter, len(in))
        positionalIdx := 0
        for _, p := range in {
                if p.ParameterType == nil {
                        continue
                }
                name := p.Name
                if name == "" {
                        name = positionalParameterMapKeyPrefix + strconv.Itoa(positionalIdx)
                        positionalIdx++
                }
                typeKind, typeJSON := bqtypes.ParameterTypeWire(p.ParameterType)
                var value string
                if p.ParameterValue != nil {
                        value = bqtypes.ParameterValueWire(p.ParameterType, p.ParameterValue)
                }
                out[name] = &enginepb.QueryParameter{
                        TypeKind:  typeKind,
                        TypeJson:  typeJSON,
                        ValueJson: value,
                }
        }
        return out
}

// streamQueryResults drains the engine's query stream into the
// per-RPC schema, DML stats, row slice, trailing statement type, and
// trailing emulator route. Returns ok=false after emitting an HTTP
// error envelope, in which case the caller must stop processing the
// request.
//
// The proto contract (see `proto/emulator.proto::QueryResultRow`)
// allows up to five message kinds on a single reply: schema, cells,
// dml_stats, statement_type, and emulator_route. The schema and
// dml_stats messages pin themselves to the first arrival (later
// resends are ignored); the two trailers are each emitted at most
// once at end-of-stream.
func streamQueryResults(w http.ResponseWriter, stream enginepb.Query_ExecuteQueryClient) (
        *enginepb.TableSchema, *enginepb.DmlStats, []bqtypes.Row, string, string, map[string]int64, bool,
) {
        var schema *enginepb.TableSchema
        var dmlStats *enginepb.DmlStats
        var statementType string
        var emulatorRoute string
        var emulatorPhases map[string]int64
        rows := make([]bqtypes.Row, 0)
        for {
                msg, err := stream.Recv()
                if errors.Is(err, io.EOF) {
                        break
                }
                if queryGRPCToHTTPError(w, err) {
                        return nil, nil, nil, "", "", nil, false
                }
                if s := msg.GetSchema(); s != nil {
                        // Per proto contract the first message carries the
                        // schema and subsequent messages carry rows. Keep the
                        // first schema we see and ignore any later resends so
                        // we don't reset mid-stream.
                        if schema == nil {
                                schema = s
                        }
                        continue
                }
                if d := msg.GetDmlStats(); d != nil {
                        // Final summary message for an INSERT/UPDATE/DELETE/
                        // MERGE statement. The engine emits exactly one of
                        // these on the DML path; later messages on the same
                        // stream are ignored (the proto contract is "one or
                        // the other" per RPC).
                        if dmlStats == nil {
                                dmlStats = d
                        }
                        continue
                }
                if st := msg.GetStatementType(); st != "" {
                        // Trailing per-reply marker the engine emits to tell
                        // the gateway which BigQuery REST `statementType`
                        // envelope to populate. Keep the first non-empty value
                        // and ignore later resends.
                        if statementType == "" {
                                statementType = st
                        }
                        continue
                }
                if er := msg.GetEmulatorRoute(); er != "" {
                        // Trailing per-reply marker the engine emits with the
                        // canonical lowercase-snake disposition string. The
                        // gateway forwards it onto
                        // `Job.statistics.query.emulatorRoute` for loopback
                        // callers only (see
                        // `gateway/middleware/loopback.go`); the gating lives
                        // at the call site, not here, so the streaming pass
                        // stays a straight collector.
                        if emulatorRoute == "" {
                                emulatorRoute = er
                        }
                        continue
                }
                if pt := msg.GetPhaseTimings(); pt != nil && len(pt.GetPhases()) > 0 {
                        if emulatorPhases == nil {
                                emulatorPhases = make(map[string]int64, len(pt.GetPhases()))
                        }
                        for _, phase := range pt.GetPhases() {
                                if phase.GetName() != "" {
                                        emulatorPhases[phase.GetName()] = phase.GetDurationUs()
                                }
                        }
                        continue
                }
                rows = append(rows, bqtypes.CellsToRowForSchema(msg.GetCells(), schema))
        }
        return schema, dmlStats, rows, statementType, emulatorRoute, emulatorPhases, true
}

// dmlStatsFromProto converts an engine-side DmlStats message into
// the REST-wire envelope. Returns nil when the engine never emitted
// a DmlStats summary (i.e. the statement was a SELECT, not DML).
func dmlStatsFromProto(d *enginepb.DmlStats) *bqtypes.DmlStats {
        if d == nil {
                return nil
        }
        return &bqtypes.DmlStats{
                InsertedRowCount: strconv.FormatInt(d.GetInsertedRowCount(), 10),
                UpdatedRowCount:  strconv.FormatInt(d.GetUpdatedRowCount(), 10),
                DeletedRowCount:  strconv.FormatInt(d.GetDeletedRowCount(), 10),
        }
}

// assembleQueryResponse builds the synchronous jobs.query response
// envelope: SELECT-shape (schema + rows + totalRows) by default,
// switching to the DML-shape (numDmlAffectedRows + zeroed selects)
// when the stream surfaced a DmlStats message. When the engine
// trailed a non-empty `statement_type` the gateway folds it into
// the BigQuery REST `Job.statistics.query.statementType` envelope;
// when `emulatorRoute` is non-empty (the caller already gated this
// on `middleware.IsLoopback`), it lands on the loopback-only
// `Job.statistics.query.emulatorRoute` debug field.
func assembleQueryResponse(job *jobs.Job, restSchema *bqtypes.TableSchema, rows []bqtypes.Row,
        dmlStats *enginepb.DmlStats, restDmlStats *bqtypes.DmlStats,
        statementType string,
        emulatorRoute string,
        emulatorPhases map[string]int64,
        ddlTargetRoutine *bqtypes.RoutineReference,
        sessionInfo *bqtypes.SessionInfo,
) bqtypes.QueryResponse {
        jobRef := job.JobReference
        out := bqtypes.QueryResponse{
                Kind:                queryResponseKind,
                Schema:              restSchema,
                JobReference:        &jobRef,
                JobComplete:         true,
                TotalRows:           strconv.FormatUint(uint64(len(rows)), 10),
                Rows:                rows,
                TotalBytesProcessed: job.Statistics.TotalBytesProcessed,
                CreationTime:        job.Statistics.CreationTime,
                StartTime:           job.Statistics.StartTime,
                EndTime:             job.Statistics.EndTime,
                Location:            jobRef.Location,
        }
        if sessionInfo != nil {
                out.SessionInfo = sessionInfo
        }
        if sessionInfo != nil || statementType != "" || emulatorRoute != "" ||
                len(emulatorPhases) > 0 || ddlTargetRoutine != nil {
                stats := &bqtypes.JobStatistics{SessionInfo: sessionInfo}
                if statementType != "" || emulatorRoute != "" || len(emulatorPhases) > 0 ||
                        ddlTargetRoutine != nil {
                        stats.Query = &bqtypes.JobStatistics2{
                                StatementType:    statementType,
                                EmulatorRoute:    emulatorRoute,
                                EmulatorPhases:   emulatorPhases,
                                DdlTargetRoutine: ddlTargetRoutine,
                        }
                }
                out.Statistics = stats
        }
        if restDmlStats != nil {
                // Surface BigQuery's DML statistics envelope. `dmlStats`
                // carries the per-operation row counts; `numDmlAffectedRows`
                // is the legacy aggregate (sum of inserted + updated +
                // deleted) that older client libraries still read.
                out.DmlStats = restDmlStats
                out.NumDmlAffectedRows = strconv.FormatInt(
                        dmlStats.GetInsertedRowCount()+
                                dmlStats.GetUpdatedRowCount()+
                                dmlStats.GetDeletedRowCount(), 10)
                // Plain DML has no result rows. `THEN RETURN` keeps schema +
                // rows alongside the stats envelope.
                if len(rows) == 0 {
                        out.Schema = nil
                        out.Rows = nil
                        out.TotalRows = "0"
                }
        }
        return out
}

// getQueryResultsKind is the value the BigQuery REST API returns for
// the `kind` field of a GetQueryResultsResponse resource. See
// docs/bigquery/docs/reference/rest/v2/jobs/getQueryResults.md.
const getQueryResultsKind = "bigquery#getQueryResultsResponse"

// QueryGetResults implements `bigquery.jobs.getQueryResults`:
//
//        GET /bigquery/v2/projects/{projectId}/queries/{jobId}
//
// Replays the cached rows + schema for a previously-run synchronous
// query. The query-select-e2e charter
// (`docs/ENGINE_POLICY.md`) limits this
// handler to single-page reads: the registry holds the entire
// result set in memory at job-completion time and this endpoint
// emits it back in one response. Real cursored pagination (multi-page
// `pageToken` lifecycle, partial reads from a streaming engine) is
// deferred to a later change alongside long-running async jobs.
//
// Documented query parameters and current behavior:
//
//   - `startIndex` (uint): respected; rows < startIndex are skipped.
//   - `maxResults` (uint): respected; rows beyond the slice are
//     truncated. The result is still flagged as complete (no
//     pageToken is emitted) -- the BigQuery contract permits
//     returning fewer rows than requested.
//   - `pageToken` (string): the emulator never mints one, so a
//     non-empty value cannot be honored. We respond with an empty
//     page and `jobComplete=true` to keep client polling loops happy.
//   - `location` (string): when both the stored job's location and
//     the query parameter are non-empty and disagree, returns 404
//     notFound -- the same shape BigQuery uses when callers route a
//     `getQueryResults` to the wrong region.
//   - `timeoutMs`, `formatOptions`: ignored. Queries are synchronous
//     so timeoutMs is moot, and the f/v wire shape is the only
//     output format the emulator emits.
//
// Project mismatches between the URL path and the stored job map to
// 404 notFound rather than 403, matching BigQuery's behavior of
// hiding cross-project jobs behind the same 404 envelope.
func QueryGetResults(deps Dependencies) http.HandlerFunc {
        if deps.Jobs == nil {
                deps.Jobs = jobs.NewRegistry()
        }
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                jobID := r.PathValue("jobId")

                job, ok := deps.Jobs.Get(jobID)
                if !ok || job.JobReference.ProjectID != projectID {
                        writeError(w, http.StatusNotFound, "notFound",
                                "Not found: Job "+projectID+":"+jobID)
                        return
                }
                if loc := r.URL.Query().Get("location"); loc != "" &&
                        job.JobReference.Location != "" &&
                        loc != job.JobReference.Location {
                        writeError(w, http.StatusNotFound, "notFound",
                                "Not found: Job "+projectID+":"+jobID+
                                        " in location "+loc)
                        return
                }

                writeJSON(w, http.StatusOK, assembleGetQueryResultsResponse(r, job))
        }
}

// assembleGetQueryResultsResponse builds the JSON envelope
// `QueryGetResults` returns. Pulled out of the handler to keep its
// cyclomatic budget below the funlen cap once the
// loopback-gated `emulatorRoute` replay landed.
func assembleGetQueryResultsResponse(r *http.Request, job *jobs.Job) bqtypes.QueryResponse {
        schema, allRows, dmlStats, statementType, emulatorRoute, emulatorPhases, ddlTargetRoutine := queryResultFields(job)
        pageRows, pageToken := paginateResults(allRows, r.URL.Query())
        jobRef := job.JobReference
        out := bqtypes.QueryResponse{
                Kind:                getQueryResultsKind,
                Schema:              schema,
                JobReference:        &jobRef,
                JobComplete:         true,
                TotalRows:           strconv.FormatUint(uint64(len(allRows)), 10),
                Rows:                pageRows,
                PageToken:           pageToken,
                TotalBytesProcessed: job.Statistics.TotalBytesProcessed,
                Location:            jobRef.Location,
        }
        out.Statistics = getQueryResultsStatistics(
                r, statementType, emulatorRoute, emulatorPhases, ddlTargetRoutine, job.Statistics.SessionInfo)
        if job.Statistics.SessionInfo != nil {
                out.SessionInfo = job.Statistics.SessionInfo
        }
        if dmlStats != nil {
                applyDmlStatsToGetQueryResults(&out, dmlStats)
        }
        return out
}

package handlers

import (
        "net/http"
        "net/url"
        "strconv"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/middleware"
)

func queryResultFields(job *jobs.Job) (
        schema *bqtypes.TableSchema,
        allRows []bqtypes.Row,
        dmlStats *bqtypes.DmlStats,
        statementType string,
        emulatorRoute string,
        emulatorPhases map[string]int64,
        ddlTargetRoutine *bqtypes.RoutineReference,
) {
        if result := job.Result; result != nil {
                schema = result.Schema
                allRows = result.Rows
                dmlStats = result.DmlStats
                statementType = result.StatementType
                emulatorRoute = result.EmulatorRoute
                emulatorPhases = result.EmulatorPhases
                ddlTargetRoutine = result.DdlTargetRoutine
        }
        return schema, allRows, dmlStats, statementType, emulatorRoute, emulatorPhases, ddlTargetRoutine
}

func getQueryResultsStatistics(
        r *http.Request,
        statementType string,
        emulatorRoute string,
        emulatorPhases map[string]int64,
        ddlTargetRoutine *bqtypes.RoutineReference,
        sessionInfo *bqtypes.SessionInfo,
) *bqtypes.JobStatistics {
        visibleRoute := ""
        visiblePhases := map[string]int64(nil)
        if middleware.IsLoopback(r.Context()) {
                visibleRoute = emulatorRoute
                visiblePhases = emulatorPhases
        }
        if statementType == "" && visibleRoute == "" && len(visiblePhases) == 0 &&
                ddlTargetRoutine == nil && sessionInfo == nil {
                return nil
        }
        stats := &bqtypes.JobStatistics{SessionInfo: sessionInfo}
        if statementType != "" || visibleRoute != "" || len(visiblePhases) > 0 || ddlTargetRoutine != nil {
                stats.Query = &bqtypes.JobStatistics2{
                        StatementType:    statementType,
                        EmulatorRoute:    visibleRoute,
                        EmulatorPhases:   visiblePhases,
                        DdlTargetRoutine: ddlTargetRoutine,
                }
        }
        return stats
}

func applyDmlStatsToGetQueryResults(out *bqtypes.QueryResponse, dmlStats *bqtypes.DmlStats) {
        out.DmlStats = dmlStats
        inserted, _ := strconv.ParseInt(dmlStats.InsertedRowCount, 10, 64)
        updated, _ := strconv.ParseInt(dmlStats.UpdatedRowCount, 10, 64)
        deleted, _ := strconv.ParseInt(dmlStats.DeletedRowCount, 10, 64)
        out.NumDmlAffectedRows = strconv.FormatInt(inserted+updated+deleted, 10)
        out.Schema = nil
        out.Rows = nil
        out.TotalRows = "0"
}

// defaultQueryResultsPageSize mirrors BigQuery's documented default
// `maxResults` for jobs.getQueryResults when the caller omits it.
const defaultQueryResultsPageSize uint64 = 10000

// paginateResults slices cached query rows using startIndex,
// maxResults, and pageToken. pageToken (when set) is a decimal string
// encoding the next start row index, matching tabledata.list.
func paginateResults(allRows []bqtypes.Row, q url.Values) ([]bqtypes.Row, string) {
        total := uint64(len(allRows))
        start := parseUintQuery(q, "startIndex", 0)
        if tok := q.Get("pageToken"); tok != "" {
                if off, err := strconv.ParseUint(tok, 10, 64); err == nil {
                        start = off
                } else {
                        return nil, ""
                }
        }
        limit := defaultQueryResultsPageSize
        if q.Get("maxResults") != "" {
                limit = parseUintQuery(q, "maxResults", defaultQueryResultsPageSize)
        }
        // maxResults=0 means "wait for completion, return zero rows" (browseTable
        // sample). Never mint a pageToken in that case or Node polls forever.
        if limit == 0 {
                return nil, ""
        }
        if start >= total {
                return nil, ""
        }
        end := min(start+limit, total)
        var nextToken string
        if end < total {
                nextToken = strconv.FormatUint(end, 10)
        }
        return allRows[start:end], nextToken
}

// parseUintQuery returns the named query parameter as a uint64,
// falling back to defaultVal when the value is missing or unparsable.
// Pulled out so the pagination helper stops nesting if-inside-if.
func parseUintQuery(q url.Values, key string, defaultVal uint64) uint64 {
        s := q.Get(key)
        if s == "" {
                return defaultVal
        }
        v, err := strconv.ParseUint(s, 10, 64)
        if err != nil {
                return defaultVal
        }
        return v
}

package handlers

import (
        "regexp"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

// createTempTableDestinationRE matches `CREATE TEMP TABLE `_SESSION`.table`
// and `CREATE TEMP TABLE `_SESSION`.`table“ shapes bigframes emits.
var createTempTableDestinationRE = regexp.MustCompile(
        "(?i)CREATE\\s+TEMP\\s+TABLE\\s+`([^`]+)`\\.(?:`([^`]+)`|([^\\s(]+))")

// stampQueryJobDestination fills configuration.query.destinationTable on
// CREATE TABLE / CREATE TEMP TABLE jobs so BigQuery clients (e.g.
// bigframes SessionResourceManager.create_temp_table) can read
// query_job.destination after jobs.insert.
func stampQueryJobDestination(projectID string, job *jobs.Job, statementType string) {
        if job == nil || job.Configuration == nil || job.Configuration.Query == nil {
                return
        }
        switch statementType {
        case "CREATE_TABLE", "CREATE_TABLE_AS_SELECT":
        default:
                return
        }
        dest := parseCreateTableDestination(projectID, job.Configuration.Query.Query)
        if dest != nil {
                job.Configuration.Query.DestinationTable = dest
        }
}

func parseCreateTableDestination(projectID, sql string) *bqtypes.TableReference {
        sql = strings.TrimSpace(sql)
        if m := createTempTableDestinationRE.FindStringSubmatch(sql); len(m) >= 3 {
                datasetID := m[1]
                tableID := m[2]
                if tableID == "" && len(m) > 3 {
                        tableID = m[3]
                }
                if datasetID == "" || tableID == "" {
                        return nil
                }
                return &bqtypes.TableReference{
                        ProjectID: projectID,
                        DatasetID: datasetID,
                        TableID:   tableID,
                }
        }
        return nil
}

package handlers

import (
        "fmt"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// expandQueryParamsInSQL applies gateway-side SQL rewrites for query parameters
// the DuckDB transpiler cannot lower yet (ARRAY IN UNNEST).
func expandQueryParamsInSQL(sql string, params []bqtypes.QueryParameter) string {
        sql = expandArrayParamsInSQL(sql, params)
        return expandPositionalArrayParamsInSQL(sql, params)
}

// expandArrayParamsInSQL rewrites `IN UNNEST(@name)` filters into `IN (...)`
// literal lists when the caller supplied a named ARRAY query parameter.
// The DuckDB transpiler does not yet lower IN UNNEST(array_param) shapes;
// expanding at the gateway preserves analyzer binding for scalar params while
// unblocking thirdparty array-parameter samples.
func expandArrayParamsInSQL(sql string, params []bqtypes.QueryParameter) string {
        out := sql
        for _, p := range params {
                if p.Name == "" || p.ParameterType == nil ||
                        strings.ToUpper(p.ParameterType.Type) != sqlTypeARRAY {
                        continue
                }
                if p.ParameterValue == nil || len(p.ParameterValue.ArrayValues) == 0 {
                        continue
                }
                quoted := make([]string, 0, len(p.ParameterValue.ArrayValues))
                for _, av := range p.ParameterValue.ArrayValues {
                        if av.Value == "" {
                                continue
                        }
                        quoted = append(quoted, fmt.Sprintf("'%s'",
                                strings.ReplaceAll(av.Value, "'", "''")))
                }
                if len(quoted) == 0 {
                        continue
                }
                list := strings.Join(quoted, ", ")
                name := p.Name
                out = strings.ReplaceAll(out, "NOT IN UNNEST(@"+name+")", "NOT IN ("+list+")")
                out = strings.ReplaceAll(out, "NOT IN UNNEST(`"+name+"`)", "NOT IN ("+list+")")
                out = strings.ReplaceAll(out, "IN UNNEST(@"+name+")", "IN ("+list+")")
                out = strings.ReplaceAll(out, "IN UNNEST(`"+name+"`)", "IN ("+list+")")
        }
        return out
}

// stripExpandedArrayParams removes ARRAY parameters that expandQueryParamsInSQL
// inlined via IN/NOT IN UNNEST so the engine is not asked to bind them.
func stripExpandedArrayParams(
        originalSQL, expandedSQL string,
        params []bqtypes.QueryParameter,
) []bqtypes.QueryParameter {
        if len(params) == 0 {
                return params
        }
        out := make([]bqtypes.QueryParameter, 0, len(params))
        remaining := originalSQL
        for _, p := range params {
                if p.ParameterType == nil ||
                        strings.ToUpper(strings.TrimSpace(p.ParameterType.Type)) != sqlTypeARRAY {
                        out = append(out, p)
                        continue
                }
                if p.Name != "" {
                        if namedArrayParamWasExpanded(originalSQL, expandedSQL, p.Name) {
                                continue
                        }
                        out = append(out, p)
                        continue
                }
                if !strings.Contains(remaining, "IN UNNEST(?)") {
                        out = append(out, p)
                        continue
                }
                if p.ParameterValue == nil || len(p.ParameterValue.ArrayValues) == 0 {
                        out = append(out, p)
                        continue
                }
                remaining = strings.Replace(remaining, "IN UNNEST(?)", "IN (__expanded__)", 1)
        }
        return out
}

func namedArrayParamWasExpanded(originalSQL, expandedSQL, name string) bool {
        for _, pattern := range []string{
                "IN UNNEST(@" + name + ")",
                "NOT IN UNNEST(@" + name + ")",
                "IN UNNEST(`" + name + "`)",
                "NOT IN UNNEST(`" + name + "`)",
        } {
                if strings.Contains(originalSQL, pattern) && !strings.Contains(expandedSQL, pattern) {
                        return true
                }
        }
        return false
}

// stripExpandedPositionalArrayParams removes positional ARRAY parameters
// that expandQueryParamsInSQL inlined via IN UNNEST(?) so engine binding
// indices stay aligned with the remaining ? placeholders.
func stripExpandedPositionalArrayParams(sql string, params []bqtypes.QueryParameter) []bqtypes.QueryParameter {
        return stripExpandedArrayParams(sql, expandQueryParamsInSQL(sql, params), params)
}

func expandPositionalArrayParamsInSQL(sql string, params []bqtypes.QueryParameter) string {
        out := sql
        for _, p := range params {
                if p.Name != "" || p.ParameterType == nil {
                        continue
                }
                if strings.ToUpper(strings.TrimSpace(p.ParameterType.Type)) != "ARRAY" {
                        continue
                }
                if !strings.Contains(out, "IN UNNEST(?)") {
                        continue
                }
                if p.ParameterValue == nil || len(p.ParameterValue.ArrayValues) == 0 {
                        continue
                }
                quoted := make([]string, 0, len(p.ParameterValue.ArrayValues))
                for _, av := range p.ParameterValue.ArrayValues {
                        if av.Value == "" {
                                continue
                        }
                        quoted = append(quoted, fmt.Sprintf("'%s'",
                                strings.ReplaceAll(av.Value, "'", "''")))
                }
                if len(quoted) == 0 {
                        continue
                }
                out = strings.Replace(out, "IN UNNEST(?)",
                        "IN ("+strings.Join(quoted, ", ")+")", 1)
        }
        return out
}

package handlers

import (
        "encoding/json"
        "io"
        "net/http"
        "strconv"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/routines"
)

// routineListKind is the `kind` field for a routines.list response. See
// docs/bigquery/docs/reference/rest/v2/routines/list.md.
const routineListKind = "bigquery#listRoutinesResponse"

const (
        defaultRoutineType     = "SCALAR_FUNCTION"
        defaultRoutineLanguage = "SQL"
)

func routineStore(deps *Dependencies) *routines.Store {
        if deps.Routines == nil {
                deps.Routines = routines.NewStore()
        }
        return deps.Routines
}

func routineIDFromPath(r *http.Request) (projectID, datasetID, routineID string) {
        return r.PathValue("projectId"), r.PathValue("datasetId"), r.PathValue("routineId")
}

func decodeRoutineBody(w http.ResponseWriter, r *http.Request) (bqtypes.Routine, bool) {
        var rt bqtypes.Routine
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "Could not read routine request body: "+err.Error())
                return rt, false
        }
        if len(body) == 0 {
                return rt, true
        }
        if err := json.Unmarshal(body, &rt); err != nil {
                writeError(w, http.StatusBadRequest, reasonInvalid,
                        "Could not parse routine request body as JSON: "+err.Error())
                return rt, false
        }
        return rt, true
}

func routineResource(projectID, datasetID, routineID string, rt bqtypes.Routine) bqtypes.Routine {
        rt.RoutineReference = bqtypes.RoutineReference{
                ProjectID: projectID,
                DatasetID: datasetID,
                RoutineID: routineID,
        }
        if rt.RoutineType == "" {
                rt.RoutineType = defaultRoutineType
        }
        if rt.Language == "" {
                rt.Language = defaultRoutineLanguage
        }
        if rt.CreationTime == "" {
                rt.CreationTime = nowMillis()
        }
        rt.LastModifiedTime = nowMillis()
        if rt.Etag == "" {
                rt.Etag = routines.MintEtag()
        }
        return rt
}

// routineListEntry trims a routine to the fields upstream list returns
// when readMask is unset.
func routineListEntry(rt bqtypes.Routine) bqtypes.Routine {
        return bqtypes.Routine{
                Etag:             rt.Etag,
                RoutineReference: rt.RoutineReference,
                RoutineType:      rt.RoutineType,
                CreationTime:     rt.CreationTime,
                LastModifiedTime: rt.LastModifiedTime,
                Language:         rt.Language,
        }
}

// RoutineList implements `bigquery.routines.list`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines
func RoutineList(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                datasetID := r.PathValue("datasetId")
                var all []bqtypes.Routine
                if routineCatalogEnabled(&deps) {
                        all = mergeRoutineSources(r.Context(), &deps, projectID, datasetID, r.URL.Query().Get("filter"))
                } else {
                        all = routineStore(&deps).List(projectID, datasetID, r.URL.Query().Get("filter"))
                }
                items := make([]bqtypes.Routine, 0, len(all))
                for _, rt := range all {
                        items = append(items, routineListEntry(rt))
                }
                resp := map[string]any{
                        resourceKeyKind: routineListKind,
                        "routines":      items,
                }
                if maxResults := r.URL.Query().Get("maxResults"); maxResults != "" {
                        if n, err := strconv.Atoi(maxResults); err == nil && n >= 0 && n < len(items) {
                                resp["routines"] = items[:n]
                                resp["nextPageToken"] = strconv.Itoa(n)
                        }
                }
                writeJSON(w, http.StatusOK, resp)
        }
}

// RoutineGet implements `bigquery.routines.get`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines/{routineId}
func RoutineGet(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, routineID := routineIDFromPath(r)
                rt, ok := routineLookupExisting(r.Context(), &deps, projectID, datasetID, routineID)
                if !ok {
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Not found: Routine "+projectID+":"+datasetID+"."+routineID)
                        return
                }
                writeJSON(w, http.StatusOK, rt)
        }
}

// RoutineInsert implements `bigquery.routines.insert`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines
func RoutineInsert(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                datasetID := r.PathValue("datasetId")
                rt, ok := decodeRoutineBody(w, r)
                if !ok {
                        return
                }
                routineID := rt.RoutineReference.RoutineID
                if routineID == "" {
                        writeError(w, http.StatusBadRequest, reasonInvalid,
                                "Required routineReference.routineId is missing.")
                        return
                }
                if rt.DefinitionBody == "" {
                        writeError(w, http.StatusBadRequest, reasonInvalid,
                                "Required definitionBody is missing.")
                        return
                }
                if rt.RoutineType == "" {
                        rt.RoutineType = defaultRoutineType
                }
                if rt.Language == "" {
                        rt.Language = defaultRoutineLanguage
                }
                out := routineResource(projectID, datasetID, routineID, rt)
                if routineCatalogEnabled(&deps) {
                        if catalogInsertRoutine(r.Context(), w, &deps, projectID, datasetID, routineID, out) {
                                return
                        }
                } else if !routineStore(&deps).Insert(out) {
                        writeError(w, http.StatusConflict, reasonDuplicate,
                                "Already Exists: Routine "+projectID+":"+datasetID+"."+routineID)
                        return
                }
                routineStore(&deps).Upsert(out)
                writeJSON(w, http.StatusOK, out)
        }
}

// RoutineUpdate implements `bigquery.routines.update`:
//
//        PUT /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines/{routineId}
func RoutineUpdate(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, routineID := routineIDFromPath(r)
                existing, ok := routineLookupExisting(r.Context(), &deps, projectID, datasetID, routineID)
                if !ok {
                        writeError(w, http.StatusNotFound, reasonNotFound,
                                "Not found: Routine "+projectID+":"+datasetID+"."+routineID)
                        return
                }
                rt, ok := decodeRoutineBody(w, r)
                if !ok {
                        return
                }
                out := routineResource(projectID, datasetID, routineID, rt)
                out.CreationTime = existing.CreationTime
                out.Etag = routines.MintEtag()
                if routineCatalogEnabled(&deps) {
                        if err := catalogUpsertRoutine(r.Context(), &deps, out); err != nil {
                                if grpcToHTTPError(w, err) {
                                        return
                                }
                                return
                        }
                }
                routineStore(&deps).Upsert(out)
                writeJSON(w, http.StatusOK, out)
        }
}

// RoutineDelete implements `bigquery.routines.delete`:
//
//        DELETE /bigquery/v2/projects/{projectId}/datasets/{datasetId}/routines/{routineId}
func RoutineDelete(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, routineID := routineIDFromPath(r)
                if routineCatalogEnabled(&deps) {
                        if err := catalogDeleteRoutine(r.Context(), &deps, projectID, datasetID, routineID); err != nil {
                                if grpcToHTTPError(w, err) {
                                        return
                                }
                                return
                        }
                }
                if !routineStore(&deps).Delete(projectID, datasetID, routineID) {
                        if !routineCatalogEnabled(&deps) {
                                writeError(w, http.StatusNotFound, reasonNotFound,
                                        "Not found: Routine "+projectID+":"+datasetID+"."+routineID)
                                return
                        }
                }
                writeJSON(w, http.StatusOK, struct{}{})
        }
}

package handlers

import (
        "context"
        "net/http"
        "slices"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/routines"
)

func routineCatalogEnabled(deps *Dependencies) bool {
        return deps.Catalog != nil
}

func routineRefProto(projectID, datasetID, routineID string) *enginepb.RoutineRef {
        return &enginepb.RoutineRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                RoutineId: routineID,
        }
}

func routineListKey(ref bqtypes.RoutineReference) string {
        return ref.ProjectID + ":" + ref.DatasetID + "." + ref.RoutineID
}

func routineTypeFromFilter(filter string) string {
        const prefix = "routineType:"
        if filter == "" || !strings.HasPrefix(filter, prefix) {
                return ""
        }
        return strings.TrimSpace(filter[len(prefix):])
}

// overlayRoutineFromStore merges gateway-store timestamps and etag onto a
// catalog-backed routine. The engine catalog does not persist those fields.
func overlayRoutineFromStore(catalog bqtypes.Routine, stored bqtypes.Routine, ok bool) bqtypes.Routine {
        if !ok {
                return catalog
        }
        if stored.CreationTime != "" {
                catalog.CreationTime = stored.CreationTime
        }
        if stored.LastModifiedTime != "" {
                catalog.LastModifiedTime = stored.LastModifiedTime
        }
        if stored.Etag != "" {
                catalog.Etag = stored.Etag
        }
        return catalog
}

func ensureRoutineTimestamps(rt *bqtypes.Routine) {
        if rt.CreationTime == "" {
                rt.CreationTime = nowMillis()
        }
        if rt.LastModifiedTime == "" {
                rt.LastModifiedTime = rt.CreationTime
        }
}

func routineFromDescriptor(desc *enginepb.RoutineDescriptor) bqtypes.Routine {
        if desc == nil {
                return bqtypes.Routine{}
        }
        ref := desc.GetRoutine()
        rt := bqtypes.Routine{
                RoutineReference: bqtypes.RoutineReference{
                        ProjectID: ref.GetProjectId(),
                        DatasetID: ref.GetDatasetId(),
                        RoutineID: ref.GetRoutineId(),
                },
                RoutineType:    bqtypes.RoutineType(desc.GetRoutineType()),
                Language:       bqtypes.RoutineLanguage(desc.GetLanguage()),
                DefinitionBody: desc.GetDefinitionBody(),
        }
        ddl := desc.GetDdlSql()
        if ddl == "" {
                return rt
        }
        parsed, ok := routines.ParseCreateRoutineDDL(ref.GetProjectId(), ref.GetDatasetId(), ddl)
        if !ok {
                return rt
        }
        applyRoutineFromDDL(&rt, parsed)
        return rt
}

func applyRoutineFromDDL(rt *bqtypes.Routine, parsed bqtypes.Routine) {
        if parsed.DefinitionBody != "" {
                rt.DefinitionBody = parsed.DefinitionBody
        }
        if len(parsed.Arguments) > 0 {
                rt.Arguments = parsed.Arguments
        }
        if parsed.ReturnType != nil {
                rt.ReturnType = parsed.ReturnType
        }
        if parsed.RoutineType != "" {
                rt.RoutineType = parsed.RoutineType
        }
        if parsed.Language != "" {
                rt.Language = parsed.Language
        }
        if parsed.PythonOptions != nil {
                rt.PythonOptions = parsed.PythonOptions
        }
}

func catalogGetRoutine(
        ctx context.Context,
        deps *Dependencies,
        projectID, datasetID, routineID string,
) (bqtypes.Routine, bool) {
        resp, err := deps.Catalog.GetRoutine(ctx, &enginepb.GetRoutineRequest{
                Routine: routineRefProto(projectID, datasetID, routineID),
        })
        if err != nil || resp == nil || resp.GetRoutine() == nil {
                return bqtypes.Routine{}, false
        }
        return routineFromDescriptor(resp.GetRoutine()), true
}

func catalogListRoutines(ctx context.Context, deps *Dependencies, projectID, datasetID string) []bqtypes.Routine {
        resp, err := deps.Catalog.ListRoutines(ctx, &enginepb.ListRoutinesRequest{
                Dataset: &enginepb.DatasetRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                },
        })
        if err != nil || resp == nil {
                return nil
        }
        out := make([]bqtypes.Routine, 0, len(resp.GetRoutines()))
        for _, desc := range resp.GetRoutines() {
                out = append(out, routineFromDescriptor(desc))
        }
        return out
}

// mergeRoutineSources unions catalog and in-memory store entries when the
// catalog is enabled so DDL-registered routines appear in list even if the
// engine list lags, and store-only routines remain visible.
func mergeRoutineSources(
        ctx context.Context,
        deps *Dependencies,
        projectID, datasetID, filter string,
) []bqtypes.Routine {
        store := routineStore(deps)
        fromStore := store.List(projectID, datasetID, filter)
        if !routineCatalogEnabled(deps) {
                return fromStore
        }
        wantType := routineTypeFromFilter(filter)
        fromCatalog := catalogListRoutines(ctx, deps, projectID, datasetID)
        byKey := make(map[string]bqtypes.Routine, len(fromCatalog)+len(fromStore))
        order := make([]string, 0, len(fromCatalog)+len(fromStore))
        add := func(rt bqtypes.Routine) {
                if wantType != "" && string(rt.RoutineType) != wantType {
                        return
                }
                key := routineListKey(rt.RoutineReference)
                if _, exists := byKey[key]; exists {
                        return
                }
                ensureRoutineTimestamps(&rt)
                byKey[key] = rt
                order = append(order, key)
        }
        for _, rt := range fromCatalog {
                ref := rt.RoutineReference
                stored, ok := store.Get(ref.ProjectID, ref.DatasetID, ref.RoutineID)
                add(overlayRoutineFromStore(rt, stored, ok))
        }
        for _, rt := range fromStore {
                key := routineListKey(rt.RoutineReference)
                if _, exists := byKey[key]; exists {
                        continue
                }
                add(rt)
        }
        slices.Sort(order)
        out := make([]bqtypes.Routine, 0, len(order))
        for _, key := range order {
                out = append(out, byKey[key])
        }
        return out
}

func routineLookupExisting(
        ctx context.Context,
        deps *Dependencies,
        projectID, datasetID, routineID string,
) (bqtypes.Routine, bool) {
        store := routineStore(deps)
        if routineCatalogEnabled(deps) {
                if rt, ok := catalogGetRoutine(ctx, deps, projectID, datasetID, routineID); ok {
                        stored, found := store.Get(projectID, datasetID, routineID)
                        rt = overlayRoutineFromStore(rt, stored, found)
                        ensureRoutineTimestamps(&rt)
                        return rt, true
                }
        }
        rt, ok := store.Get(projectID, datasetID, routineID)
        if ok {
                ensureRoutineTimestamps(&rt)
        }
        return rt, ok
}

// catalogInsertRoutine persists a new routine via the catalog. Returns true when
// the HTTP response has been written (conflict or engine error).
func catalogInsertRoutine(
        ctx context.Context,
        w http.ResponseWriter,
        deps *Dependencies,
        projectID, datasetID, routineID string,
        out bqtypes.Routine,
) bool {
        if _, exists := catalogGetRoutine(ctx, deps, projectID, datasetID, routineID); exists {
                writeError(w, http.StatusConflict, reasonDuplicate,
                        "Already Exists: Routine "+projectID+":"+datasetID+"."+routineID)
                return true
        }
        if err := catalogUpsertRoutine(ctx, deps, out); err != nil {
                grpcToHTTPError(w, err)
                return true
        }
        return false
}

func catalogUpsertRoutine(ctx context.Context, deps *Dependencies, rt bqtypes.Routine) error {
        ddl := routines.BuildDDLFromRoutine(rt)
        _, err := deps.Catalog.UpsertRoutine(ctx, &enginepb.UpsertRoutineRequest{
                Routine: &enginepb.RoutineDescriptor{
                        Routine: routineRefProto(
                                rt.RoutineReference.ProjectID,
                                rt.RoutineReference.DatasetID,
                                rt.RoutineReference.RoutineID,
                        ),
                        RoutineType:    string(rt.RoutineType),
                        Language:       string(rt.Language),
                        DefinitionBody: rt.DefinitionBody,
                        DdlSql:         ddl,
                },
        })
        return err
}

func catalogDeleteRoutine(ctx context.Context, deps *Dependencies, projectID, datasetID, routineID string) error {
        _, err := deps.Catalog.DeleteRoutine(ctx, &enginepb.DeleteRoutineRequest{
                Routine: routineRefProto(projectID, datasetID, routineID),
        })
        return err
}

// persistRoutineFromDDL registers a routine parsed from CREATE FUNCTION /
// PROCEDURE DDL in the in-memory store and mirrors it to the catalog when
// enabled so RoutineGet sees the same metadata as RoutineInsert.
func persistRoutineFromDDL(
        ctx context.Context,
        deps *Dependencies,
        projectID, defaultDatasetID, sql string,
) *bqtypes.RoutineReference {
        store := routineStore(deps)
        ref := routines.RegisterFromDDL(store, projectID, defaultDatasetID, sql)
        if ref == nil || !routineCatalogEnabled(deps) {
                return ref
        }
        rt, ok := store.Get(ref.ProjectID, ref.DatasetID, ref.RoutineID)
        if !ok {
                return ref
        }
        _ = catalogUpsertRoutine(ctx, deps, rt)
        return ref
}

package handlers

import (
        "context"
        "encoding/json"
        "net/http"
        "strconv"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

// rowAccessPolicyListKind is the `kind` field for a
// rowAccessPolicies.list response. See
// docs/bigquery/docs/reference/rest/v2/rowAccessPolicies/list.md.
const rowAccessPolicyListKind = "bigquery#listRowAccessPoliciesResponse"

const rowAccessPolicyKind = "bigquery#rowAccessPolicy"

type rowAccessPolicyReference struct {
        ProjectID string `json:"projectId"`
        DatasetID string `json:"datasetId"`
        TableID   string `json:"tableId"`
        PolicyID  string `json:"policyId"`
}

type rowAccessPolicyWire struct {
        Kind                     string                   `json:"kind,omitempty"`
        Etag                     string                   `json:"etag,omitempty"`
        RowAccessPolicyReference rowAccessPolicyReference `json:"rowAccessPolicyReference"`
        FilterPredicate          string                   `json:"filterPredicate"`
        CreationTime             string                   `json:"creationTime,omitempty"`
        LastModifiedTime         string                   `json:"lastModifiedTime,omitempty"`
        Grantees                 []string                 `json:"grantees,omitempty"`
}

func rowAccessPolicyPathValues(r *http.Request) (projectID, datasetID, tableID, policyID string) {
        return r.PathValue("projectId"), r.PathValue("datasetId"),
                r.PathValue("tableId"), r.PathValue("policyId")
}

func tableRef(projectID, datasetID, tableID string) *enginepb.TableRef {
        return &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
}

func msToTimestampString(ms int64) string {
        if ms <= 0 {
                return ""
        }
        return strconv.FormatInt(ms, 10)
}

func policyToWire(p *enginepb.RowAccessPolicy) rowAccessPolicyWire {
        ref := rowAccessPolicyReference{
                ProjectID: p.GetTable().GetProjectId(),
                DatasetID: p.GetTable().GetDatasetId(),
                TableID:   p.GetTable().GetTableId(),
                PolicyID:  p.GetPolicyId(),
        }
        return rowAccessPolicyWire{
                Kind:                     rowAccessPolicyKind,
                Etag:                     p.GetPolicyId(),
                RowAccessPolicyReference: ref,
                FilterPredicate:          p.GetFilterPredicate(),
                CreationTime:             msToTimestampString(p.GetCreationTimeMs()),
                LastModifiedTime:         msToTimestampString(p.GetLastModifiedTimeMs()),
                Grantees:                 append([]string(nil), p.GetGrantees()...),
        }
}

func decodeRowAccessPolicyBody(r *http.Request) (rowAccessPolicyWire, error) {
        var body rowAccessPolicyWire
        if err := json.NewDecoder(r.Body).Decode(&body); err != nil {
                return rowAccessPolicyWire{}, err
        }
        return body, nil
}

// RowAccessPolicyList implements `bigquery.rowAccessPolicies.list`.
func RowAccessPolicyList(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID, _ := rowAccessPolicyPathValues(r)
                if deps.Catalog == nil {
                        writeJSON(w, http.StatusOK, map[string]any{
                                resourceKeyKind:     rowAccessPolicyListKind,
                                "rowAccessPolicies": []any{},
                        })
                        return
                }
                resp, err := deps.Catalog.ListRowAccessPolicies(r.Context(),
                        &enginepb.ListRowAccessPoliciesRequest{
                                Table: tableRef(projectID, datasetID, tableID),
                        })
                if err != nil {
                        writeGRPCError(w, err)
                        return
                }
                policies := make([]rowAccessPolicyWire, 0, len(resp.GetPolicies()))
                for _, p := range resp.GetPolicies() {
                        policies = append(policies, policyToWire(p))
                }
                writeJSON(w, http.StatusOK, map[string]any{
                        resourceKeyKind:     rowAccessPolicyListKind,
                        "rowAccessPolicies": policies,
                })
        }
}

// RowAccessPolicyInsert implements `bigquery.rowAccessPolicies.insert`.
func RowAccessPolicyInsert(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID, _ := rowAccessPolicyPathValues(r)
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                body, err := decodeRowAccessPolicyBody(r)
                if err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
                        return
                }
                policyID := body.RowAccessPolicyReference.PolicyID
                if policyID == "" {
                        policyID = r.URL.Query().Get("policyId")
                }
                if policyID == "" {
                        writeError(w, http.StatusBadRequest, reasonInvalid, "policyId is required")
                        return
                }
                now := time.Now().UnixMilli()
                resp, err := deps.Catalog.UpsertRowAccessPolicy(r.Context(),
                        &enginepb.UpsertRowAccessPolicyRequest{
                                Policy: &enginepb.RowAccessPolicy{
                                        Table:              tableRef(projectID, datasetID, tableID),
                                        PolicyId:           policyID,
                                        FilterPredicate:    body.FilterPredicate,
                                        Grantees:           body.Grantees,
                                        CreationTimeMs:     now,
                                        LastModifiedTimeMs: now,
                                },
                        })
                if err != nil {
                        writeGRPCError(w, err)
                        return
                }
                writeJSON(w, http.StatusOK, policyToWire(resp.GetPolicy()))
        }
}

// RowAccessPolicyGet implements `bigquery.rowAccessPolicies.get`.
func RowAccessPolicyGet(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID, policyID := rowAccessPolicyPathValues(r)
                if deps.Catalog == nil {
                        NotFound(w, r)
                        return
                }
                resp, err := deps.Catalog.ListRowAccessPolicies(r.Context(),
                        &enginepb.ListRowAccessPoliciesRequest{
                                Table: tableRef(projectID, datasetID, tableID),
                        })
                if err != nil {
                        writeGRPCError(w, err)
                        return
                }
                for _, p := range resp.GetPolicies() {
                        if p.GetPolicyId() == policyID {
                                writeJSON(w, http.StatusOK, policyToWire(p))
                                return
                        }
                }
                NotFound(w, r)
        }
}

// RowAccessPolicyUpdate implements `bigquery.rowAccessPolicies.update`.
func RowAccessPolicyUpdate(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID, policyID := rowAccessPolicyPathValues(r)
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                body, err := decodeRowAccessPolicyBody(r)
                if err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
                        return
                }
                now := time.Now().UnixMilli()
                resp, err := deps.Catalog.UpsertRowAccessPolicy(r.Context(),
                        &enginepb.UpsertRowAccessPolicyRequest{
                                Policy: &enginepb.RowAccessPolicy{
                                        Table:              tableRef(projectID, datasetID, tableID),
                                        PolicyId:           policyID,
                                        FilterPredicate:    body.FilterPredicate,
                                        Grantees:           body.Grantees,
                                        LastModifiedTimeMs: now,
                                },
                        })
                if err != nil {
                        writeGRPCError(w, err)
                        return
                }
                writeJSON(w, http.StatusOK, policyToWire(resp.GetPolicy()))
        }
}

// RowAccessPolicyDelete implements `bigquery.rowAccessPolicies.delete`.
func RowAccessPolicyDelete(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID, policyID := rowAccessPolicyPathValues(r)
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                _, err := deps.Catalog.DeleteRowAccessPolicy(r.Context(),
                        &enginepb.DeleteRowAccessPolicyRequest{
                                Table:    tableRef(projectID, datasetID, tableID),
                                PolicyId: policyID,
                        })
                if err != nil {
                        writeGRPCError(w, err)
                        return
                }
                w.WriteHeader(http.StatusOK)
        }
}

// RowAccessPolicyDispatch routes table-scoped rowAccessPolicies methods.
func RowAccessPolicyDispatch(deps Dependencies) http.HandlerFunc {
        list := RowAccessPolicyList(deps)
        insert := RowAccessPolicyInsert(deps)
        get := RowAccessPolicyGet(deps)
        update := RowAccessPolicyUpdate(deps)
        del := RowAccessPolicyDelete(deps)
        iam := RowAccessPolicyIamPolicy(deps)
        return func(w http.ResponseWriter, r *http.Request) {
                policyID := r.PathValue("policyId")
                if policyID != "" {
                        if r.Method == http.MethodGet {
                                get(w, r)
                                return
                        }
                        if r.Method == http.MethodPut || r.Method == http.MethodPatch {
                                update(w, r)
                                return
                        }
                        if r.Method == http.MethodDelete {
                                del(w, r)
                                return
                        }
                        if r.Method == http.MethodPost {
                                iam(w, r)
                                return
                        }
                        writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
                                "HTTP method not supported for this rowAccessPolicies endpoint")
                        return
                }
                switch r.Method {
                case http.MethodGet:
                        list(w, r)
                case http.MethodPost:
                        insert(w, r)
                default:
                        writeError(w, http.StatusMethodNotAllowed, reasonInvalid,
                                "HTTP method not supported for this rowAccessPolicies endpoint")
                }
        }
}

func writeGRPCError(w http.ResponseWriter, err error) {
        st, ok := status.FromError(err)
        if !ok {
                writeError(w, http.StatusInternalServerError, reasonInternalError, err.Error())
                return
        }
        switch st.Code() {
        case codes.NotFound:
                writeError(w, http.StatusNotFound, reasonNotFound, st.Message())
        case codes.InvalidArgument:
                writeError(w, http.StatusBadRequest, reasonInvalid, st.Message())
        case codes.PermissionDenied:
                writeError(w, http.StatusForbidden, reasonAccessDenied, st.Message())
        case codes.Unimplemented:
                NotImplemented(w, nil)
        default:
                writeError(w, http.StatusInternalServerError, reasonInternalError, st.Message())
        }
}

// SyncColumnGovernanceFromSchema persists policy tags from a REST schema
// patch into the engine catalog for query-time masking.
func SyncColumnGovernanceFromSchema(
        ctx context.Context,
        deps Dependencies,
        projectID, datasetID, tableID string,
        schema *bqtypes.TableSchema,
) {
        if deps.Catalog == nil || schema == nil {
                return
        }
        for _, field := range schema.Fields {
                maskKind := field.MaskKind
                if maskKind == "" && field.PolicyTags != nil && len(field.PolicyTags.Names) > 0 {
                        maskKind = "SHA256"
                }
                if maskKind == "" {
                        continue
                }
                col := &enginepb.ColumnGovernance{
                        ColumnName: field.Name,
                        MaskKind:   maskKind,
                }
                if field.PolicyTags != nil {
                        col.PolicyTags = append([]string(nil), field.PolicyTags.Names...)
                }
                _, _ = deps.Catalog.SetColumnGovernance(ctx, &enginepb.SetColumnGovernanceRequest{
                        Table:  tableRef(projectID, datasetID, tableID),
                        Column: col,
                })
        }
}

func RowAccessPolicyIamPolicy(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) { NotImplemented(w, r) }
}

package handlers

import (
        "context"
        "fmt"
        "net/http"
        "regexp"
        "strconv"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/middleware"
        "github.com/vantaboard/bigquery-emulator/gateway/query"
)

// isMultiStatementScript reports whether sql is a DECLARE/SET script the
// gateway executes statement-by-statement.
func stripBlockComments(sql string) string {
        var out strings.Builder
        out.Grow(len(sql))
        for i := 0; i < len(sql); {
                if i+1 < len(sql) && sql[i] == '/' && sql[i+1] == '*' {
                        i += 2
                        for i+1 < len(sql) && (sql[i] != '*' || sql[i+1] != '/') {
                                i++
                        }
                        if i+1 < len(sql) {
                                i += 2
                        }
                        continue
                }
                out.WriteByte(sql[i])
                i++
        }
        return out.String()
}

func trimLeadingSQLComments(sql string) string {
        var kept []string
        for line := range strings.SplitSeq(sql, "\n") {
                if strings.HasPrefix(strings.TrimSpace(line), "--") {
                        continue
                }
                kept = append(kept, line)
        }
        return strings.TrimSpace(strings.Join(kept, "\n"))
}

func sqlForScriptDetection(sql string) string {
        return trimLeadingSQLComments(stripBlockComments(sql))
}

var setKeywordRE = regexp.MustCompile(`(?i)\bSET\b`)

var beginEndBlockRE = regexp.MustCompile(`(?is)^\s*BEGIN\s+(.*)\s+END\s*;?\s*$`)

func unwrapBeginEndBlock(sql string) string {
        trimmed := strings.TrimSpace(sql)
        if m := beginEndBlockRE.FindStringSubmatch(trimmed); len(m) == 2 {
                return strings.TrimSpace(m[1])
        }
        return sql
}

func isMultiStatementScript(sql string) bool {
        trimmed := strings.TrimSpace(sql)
        upper := strings.ToUpper(sqlForScriptDetection(trimmed))
        // DDL setup statements (CREATE PROCEDURE bodies embed BEGIN/SET) must
        // not enter the script splitter.
        if strings.HasPrefix(upper, "CREATE ") ||
                strings.HasPrefix(upper, "DROP ") ||
                strings.HasPrefix(upper, "ALTER ") {
                return false
        }
        sql = unwrapBeginEndBlock(trimmed)
        detected := sqlForScriptDetection(sql)
        upper = strings.ToUpper(detected)
        return strings.Contains(upper, "DECLARE ") ||
                strings.Contains(upper, "CALL ") ||
                (strings.Count(detected, ";") >= 2 && setKeywordRE.MatchString(upper))
}

// needsEngineScriptExecution reports whether the script must run as one
// engine round-trip so DECLARE/CALL variable scope survives (the engine's
// ExecuteMultiStmtScript path). Legacy SET+UNNEST substitution scripts
// without DECLARE/CALL stay on the per-statement split path.
func needsEngineScriptExecution(sql string) bool {
        trimmed := strings.TrimSpace(sql)
        upper := strings.ToUpper(trimLeadingSQLComments(trimmed))
        return strings.HasPrefix(upper, "BEGIN") ||
                strings.Contains(upper, "DECLARE ") ||
                strings.Contains(upper, "CALL ")
}

// splitScriptStatements splits script SQL on semicolons outside quotes.
func splitScriptStatements(sql string) []string {
        var out []string
        var b strings.Builder
        inQuote := false
        for i := 0; i < len(sql); i++ {
                c := sql[i]
                if c == '\'' {
                        inQuote = !inQuote
                        b.WriteByte(c)
                        continue
                }
                if c == ';' && !inQuote {
                        stmt := strings.TrimSpace(b.String())
                        if stmt != "" && !isCommentOnlyStatement(stmt) {
                                out = append(out, stmt)
                        }
                        b.Reset()
                        continue
                }
                b.WriteByte(c)
        }
        if tail := strings.TrimSpace(b.String()); tail != "" && !isCommentOnlyStatement(tail) {
                out = append(out, tail)
        }
        return out
}

func isCommentOnlyStatement(s string) bool {
        for line := range strings.SplitSeq(s, "\n") {
                t := strings.TrimSpace(line)
                if t == "" {
                        continue
                }
                if !strings.HasPrefix(t, "--") {
                        return false
                }
        }
        return true
}

type scriptStmtKind int

const (
        scriptStmtDeclare scriptStmtKind = iota
        scriptStmtSet
        scriptStmtCall
        scriptStmtQuery
)

type scriptStatement struct {
        kind scriptStmtKind
        sql  string
        name string
}

func classifyScriptStatement(sql string) scriptStatement {
        trim := trimLeadingSQLComments(sql)
        upper := strings.ToUpper(trim)
        switch {
        case strings.HasPrefix(upper, "DECLARE "):
                rest := strings.TrimSpace(trim[8:])
                name := rest
                if sp := strings.IndexAny(rest, " \t"); sp > 0 {
                        name = rest[:sp]
                }
                return scriptStatement{kind: scriptStmtDeclare, sql: trim, name: name}
        case strings.HasPrefix(upper, "SET "):
                name, body := parseSetStatement(trim)
                return scriptStatement{kind: scriptStmtSet, sql: body, name: name}
        case strings.HasPrefix(upper, "CALL "):
                return scriptStatement{kind: scriptStmtCall, sql: trim}
        default:
                return scriptStatement{kind: scriptStmtQuery, sql: trim}
        }
}

func parseSetStatement(sql string) (name, body string) {
        rest := strings.TrimSpace(sql[4:])
        before, after, ok := strings.Cut(rest, "=")
        if !ok {
                return "", sql
        }
        name = strings.TrimSpace(before)
        body = strings.TrimSpace(after)
        body = strings.TrimSuffix(body, ";")
        return name, body
}

func substituteScriptVars(sql string, vars map[string][]string) string {
        out := sql
        for name, vals := range vars {
                if len(vals) == 0 {
                        continue
                }
                quoted := make([]string, len(vals))
                for i, s := range vals {
                        quoted[i] = fmt.Sprintf("'%s'", strings.ReplaceAll(s, "'", "''"))
                }
                list := strings.Join(quoted, ", ")
                out = strings.ReplaceAll(out, "UNNEST("+name+")", "UNNEST(["+list+"])")
                out = strings.ReplaceAll(out, "UNNEST(`"+name+"`)", "UNNEST(["+list+"])")
        }
        return out
}

func arrayFromRow(rows []bqtypes.Row) []string {
        if len(rows) != 1 || len(rows[0].F) != 1 {
                return nil
        }
        arr, ok := rows[0].F[0].V.([]bqtypes.Cell)
        if !ok {
                return nil
        }
        out := make([]string, 0, len(arr))
        for _, c := range arr {
                if s, ok := c.V.(string); ok {
                        out = append(out, s)
                }
        }
        return out
}

func executeScriptStatement(
        ctx context.Context,
        deps Dependencies,
        projectID, defaultDataset, sql string,
        useLegacy bool,
) (*enginepb.TableSchema, []bqtypes.Row, string, string, error) {
        sql, err := query.PrepareEngineSQL(useLegacy, sql, projectID, defaultDataset)
        if err != nil {
                return nil, nil, "", "", err
        }
        engineReq := &enginepb.QueryRequest{
                ProjectId:        projectID,
                DefaultDatasetId: defaultDataset,
                Sql:              sql,
                UseLegacySql:     false,
        }
        stream, err := deps.Query.ExecuteQuery(ctx, engineReq)
        if err != nil {
                return nil, nil, "", "", err
        }
        schema, _, rows, statementType, emulatorRoute, _, streamErr := drainSyncStream(stream)
        if streamErr != nil {
                return nil, nil, "", "", streamErr
        }
        return schema, rows, statementType, emulatorRoute, nil
}

func stampChildJobParent(job *jobs.Job, parentID string) {
        job.ParentJobID = parentID
        job.Statistics.ParentJobID = parentID
}

type scriptExecOutcome struct {
        childCount    int
        finalSchema   *enginepb.TableSchema
        finalRows     []bqtypes.Row
        finalStmtType string
        finalRoute    string
}

// declareToCreateConstant lowers DECLARE to CREATE CONSTANT for the engine's
// AnalyzeNextStatement script loop (DECLARE is script-only parse syntax).
func declareToCreateConstant(stmt string) string {
        trim := trimLeadingSQLComments(strings.TrimSpace(stmt))
        if !strings.HasPrefix(strings.ToUpper(trim), "DECLARE ") {
                return trim
        }
        rest := strings.TrimSpace(trim[8:])
        rest = strings.TrimSuffix(rest, ";")
        defaultPart := ""
        if idx := strings.Index(strings.ToUpper(rest), " DEFAULT "); idx >= 0 {
                defaultPart = strings.TrimSpace(rest[idx+len(" DEFAULT "):])
                rest = strings.TrimSpace(rest[:idx])
        }
        before, after, ok := strings.Cut(rest, " ")
        if !ok {
                return trim
        }
        name := strings.TrimSpace(before)
        typeName := strings.TrimSpace(after)
        if defaultPart != "" {
                return fmt.Sprintf("CREATE CONSTANT %s = %s", name, defaultPart)
        }
        return fmt.Sprintf("CREATE CONSTANT %s = CAST(NULL AS %s)", name, typeName)
}

func transformScriptDeclares(sql string) string {
        inner := unwrapBeginEndBlock(sql)
        // Control-flow scripts must reach googlesql::ScriptExecutor with DECLARE
        // syntax and intact IF/WHILE bodies. Per-statement splitting breaks
        // semicolons inside THEN/ELSE branches.
        if scriptNeedsGoogleSQLExecutor(inner) {
                return strings.TrimSpace(sql)
        }
        parts := splitScriptStatements(inner)
        if len(parts) == 0 {
                return inner
        }
        out := make([]string, 0, len(parts))
        for _, p := range parts {
                out = append(out, declareToCreateConstant(p))
        }
        return strings.Join(out, ";\n")
}

func runLegacySplitScript(
        ctx context.Context,
        deps Dependencies,
        r *http.Request,
        projectID string,
        parent *jobs.Job,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
        sql string,
        defaultDataset string,
        useLegacy bool,
) (*scriptExecOutcome, error) {
        vars := make(map[string][]string)
        out := &scriptExecOutcome{}
        for _, raw := range splitScriptStatements(unwrapBeginEndBlock(sql)) {
                st := classifyScriptStatement(raw)
                switch st.kind {
                case scriptStmtDeclare:
                        vars[st.name] = nil
                        continue
                case scriptStmtCall, scriptStmtSet, scriptStmtQuery:
                        stmtSQL := st.sql
                        if st.kind == scriptStmtQuery {
                                stmtSQL = substituteScriptVars(stmtSQL, vars)
                        }
                        childPosted := *posted
                        childPosted.JobReference.JobID = ""
                        childCfg := *cfg
                        qCopy := *cfg.Query
                        qCopy.Query = stmtSQL
                        childCfg.Query = &qCopy
                        child := newPendingJob(deps, projectID, &childPosted, &childCfg)
                        stampChildJobParent(child, parent.JobReference.JobID)
                        childStart := time.Now().UTC()
                        schema, rows, statementType, emulatorRoute, err := executeScriptStatement(
                                ctx, deps, projectID, defaultDataset, stmtSQL, useLegacy)
                        if err != nil {
                                return nil, err
                        }
                        if st.kind == scriptStmtSet && st.name != "" {
                                if arr := arrayFromRow(rows); len(arr) > 0 {
                                        vars[st.name] = arr
                                }
                        }
                        childEnd := time.Now().UTC()
                        finalizeDoneJob(deps, child, childStart, childEnd,
                                schema, nil, rows, statementType, emulatorRoute, nil, nil, nil, r)
                        stampChildJobParent(child, parent.JobReference.JobID)
                        out.childCount++
                        if st.kind == scriptStmtQuery {
                                out.finalSchema = schema
                                out.finalRows = rows
                                out.finalStmtType = statementType
                                out.finalRoute = emulatorRoute
                        }
                }
        }
        return out, nil
}

func runScriptStatements(
        ctx context.Context,
        deps Dependencies,
        r *http.Request,
        projectID string,
        parent *jobs.Job,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
        sql string,
        defaultDataset string,
        useLegacy bool,
) (*scriptExecOutcome, error) {
        if needsEngineScriptExecution(sql) {
                return runEngineScript(
                        ctx, deps, r, projectID, parent, posted, cfg,
                        defaultDataset, sql, useLegacy)
        }
        return runLegacySplitScript(
                ctx, deps, r, projectID, parent, posted, cfg,
                sql, defaultDataset, useLegacy)
}

func finalizeScriptParentJob(
        parent *jobs.Job,
        parentStart, parentEnd time.Time,
        out *scriptExecOutcome,
) {
        parent.Status.State = jobs.JobStateDone
        parent.Statistics.StartTime = millisString(parentStart)
        parent.Statistics.EndTime = millisString(parentEnd)
        parent.Statistics.NumChildJobs = strconv.Itoa(out.childCount)
        if out.finalRows != nil || out.finalSchema != nil {
                restSchema := schemaFromProto(out.finalSchema)
                parent.Result = &jobs.QueryResult{
                        Schema:        restSchema,
                        Rows:          out.finalRows,
                        StatementType: out.finalStmtType,
                        EmulatorRoute: out.finalRoute,
                }
                if out.finalStmtType != "" {
                        parent.Statistics.Query = &bqtypes.JobStatistics2{StatementType: out.finalStmtType}
                }
        }
}

// runSyncScriptQueryInsert executes DECLARE/SET/SELECT scripts and
// registers a parent job plus per-statement child jobs.
func runSyncScriptQueryInsert(
        deps Dependencies,
        w http.ResponseWriter,
        r *http.Request,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
) {
        projectID := r.PathValue("projectId")
        parent := newPendingJob(deps, projectID, posted, cfg)
        parentStart := time.Now().UTC()

        useLegacy := false
        if cfg.Query.UseLegacySQL != nil {
                useLegacy = *cfg.Query.UseLegacySQL
        }
        defaultDataset := resolveDefaultDataset(deps, cfg.Query.DefaultDataset)

        out, err := runScriptStatements(
                r.Context(), deps, r, projectID, parent, posted, cfg,
                cfg.Query.Query, defaultDataset, useLegacy)
        if err != nil {
                finalizeFailedJob(deps, parent, parentStart, err)
                if queryGRPCToHTTPError(w, err) {
                        return
                }
                writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
                return
        }
        finalizeScriptParentJob(parent, parentStart, time.Now().UTC(), out)
        writeJSON(w, http.StatusOK, parent)
}

// runQueryScriptExecute handles the jobs.query path for multi-statement
// scripts (client.query uses jobs.query when the request body is simple).
func runQueryScriptExecute(
        deps Dependencies,
        w http.ResponseWriter,
        r *http.Request,
        req *bqtypes.QueryRequest,
        defaultDataset string,
) {
        projectID := r.PathValue("projectId")
        parentStart := time.Now().UTC()
        posted := &jobs.Job{JobReference: bqtypes.JobReference{
                ProjectID: projectID,
                Location:  req.Location,
        }}
        cfg := &jobs.JobConfiguration{
                JobType: jobConfigurationKindQuery,
                Query:   &jobs.JobConfigurationQuery{Query: req.Query},
        }
        parent := newPendingJob(deps, projectID, posted, cfg)

        useLegacy := req.UseLegacySQL != nil && *req.UseLegacySQL
        out, err := runScriptStatements(
                r.Context(), deps, r, projectID, parent, posted, cfg,
                req.Query, defaultDataset, useLegacy)
        if err != nil {
                finalizeFailedJob(deps, parent, parentStart, err)
                if queryGRPCToHTTPError(w, err) {
                        return
                }
                writeError(w, http.StatusBadRequest, reasonInvalidQuery, err.Error())
                return
        }
        parentEnd := time.Now().UTC()
        finalizeScriptParentJob(parent, parentStart, parentEnd, out)
        restSchema := schemaFromProto(out.finalSchema)
        visibleRoute := ""
        if middleware.IsLoopback(r.Context()) {
                visibleRoute = out.finalRoute
        }
        sessionInfo := sessionStore(&deps).Resolve(
                projectID, req.Location, req.CreateSession, req.ConnProperties)
        stampJobSessionInfo(parent, sessionInfo)
        outResp := assembleQueryResponse(
                parent, restSchema, out.finalRows, nil, nil,
                out.finalStmtType, visibleRoute, nil, nil, sessionInfo)
        writeJSON(w, http.StatusOK, outResp)
}

package handlers

import (
        "context"
        "net/http"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

type engineScriptFinalResult struct {
        schema        *enginepb.TableSchema
        rows          []bqtypes.Row
        statementType string
        emulatorRoute string
}

func registerEngineScriptChildJobs(
        ctx context.Context,
        deps Dependencies,
        r *http.Request,
        projectID, defaultDataset string,
        useLegacy bool,
        parent *jobs.Job,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
        sql string,
        final engineScriptFinalResult,
) int {
        inner := unwrapBeginEndBlock(sql)
        if scriptNeedsGoogleSQLExecutor(inner) {
                if final.schema == nil && len(final.rows) == 0 {
                        return 0
                }
                registerFinalSelectChildJob(
                        deps, r, projectID, parent, posted, cfg, final)
                return 1
        }
        statements := splitScriptStatements(inner)
        lastQueryIdx := -1
        for i, raw := range statements {
                if classifyScriptStatement(raw).kind == scriptStmtQuery {
                        lastQueryIdx = i
                }
        }
        childCount := 0
        for i, raw := range statements {
                st := classifyScriptStatement(raw)
                switch st.kind {
                case scriptStmtDeclare, scriptStmtCall:
                        continue
                case scriptStmtSet:
                        registerReExecutedEngineScriptChild(
                                ctx, deps, r, projectID, defaultDataset, useLegacy,
                                parent, posted, cfg, st.sql)
                        childCount++
                case scriptStmtQuery:
                        if i == lastQueryIdx {
                                registerFinalSelectChildJob(
                                        deps, r, projectID, parent, posted, cfg, final)
                                childCount++
                        } else {
                                registerReExecutedEngineScriptChild(
                                        ctx, deps, r, projectID, defaultDataset, useLegacy,
                                        parent, posted, cfg, st.sql)
                                childCount++
                        }
                }
        }
        return childCount
}

func registerReExecutedEngineScriptChild(
        ctx context.Context,
        deps Dependencies,
        r *http.Request,
        projectID, defaultDataset string,
        useLegacy bool,
        parent *jobs.Job,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
        stmtSQL string,
) {
        childPosted := *posted
        childPosted.JobReference.JobID = ""
        childCfg := *cfg
        qCopy := *cfg.Query
        qCopy.Query = stmtSQL
        childCfg.Query = &qCopy
        child := newPendingJob(deps, projectID, &childPosted, &childCfg)
        stampChildJobParent(child, parent.JobReference.JobID)
        childStart := time.Now().UTC()
        schema, rows, statementType, emulatorRoute, err := executeScriptStatement(
                ctx, deps, projectID, defaultDataset, stmtSQL, useLegacy)
        childEnd := time.Now().UTC()
        if err != nil {
                finalizeFailedJob(deps, child, childStart, err)
                stampChildJobParent(child, parent.JobReference.JobID)
                return
        }
        finalizeDoneJob(deps, child, childStart, childEnd,
                schema, nil, rows, statementType, emulatorRoute, nil, nil, nil, r)
        stampChildJobParent(child, parent.JobReference.JobID)
}

func registerFinalSelectChildJob(
        deps Dependencies,
        r *http.Request,
        projectID string,
        parent *jobs.Job,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
        final engineScriptFinalResult,
) {
        childPosted := *posted
        childPosted.JobReference.JobID = ""
        childCfg := *cfg
        qCopy := *cfg.Query
        childCfg.Query = &qCopy
        child := newPendingJob(deps, projectID, &childPosted, &childCfg)
        stampChildJobParent(child, parent.JobReference.JobID)
        childStart := time.Now().UTC()
        childEnd := time.Now().UTC()
        finalizeDoneJob(deps, child, childStart, childEnd,
                final.schema, nil, final.rows, final.statementType, final.emulatorRoute,
                nil, nil, nil, r)
        stampChildJobParent(child, parent.JobReference.JobID)
}

func runEngineScript(
        ctx context.Context,
        deps Dependencies,
        r *http.Request,
        projectID string,
        parent *jobs.Job,
        posted *jobs.Job,
        cfg *jobs.JobConfiguration,
        defaultDataset, sql string,
        useLegacy bool,
) (*scriptExecOutcome, error) {
        engineSQL := transformScriptDeclares(sql)
        schema, rows, statementType, emulatorRoute, err := executeScriptStatement(
                ctx, deps, projectID, defaultDataset, engineSQL, useLegacy)
        if err != nil {
                return nil, err
        }
        childCount := registerEngineScriptChildJobs(
                ctx, deps, r, projectID, defaultDataset, useLegacy,
                parent, posted, cfg, sql,
                engineScriptFinalResult{
                        schema:        schema,
                        rows:          rows,
                        statementType: statementType,
                        emulatorRoute: emulatorRoute,
                })
        return &scriptExecOutcome{
                childCount:    childCount,
                finalSchema:   schema,
                finalRows:     rows,
                finalStmtType: statementType,
                finalRoute:    emulatorRoute,
        }, nil
}

package handlers

import "strings"

// scriptNeedsGoogleSQLExecutor mirrors
// backend/engine/coordinator/script_executor_internal.cc so the gateway
// preserves DECLARE syntax for scripts routed through
// googlesql::ScriptExecutor (CREATE CONSTANT lowering breaks IF/WHILE scope).
func scriptNeedsGoogleSQLExecutor(sql string) bool {
        trimmed := strings.TrimSpace(sqlForScriptDetection(sql))
        upper := strings.ToUpper(trimmed)
        if strings.HasPrefix(upper, "IF ") ||
                strings.HasPrefix(upper, "WHILE ") ||
                strings.HasPrefix(upper, "LOOP ") ||
                strings.HasPrefix(upper, "REPEAT") ||
                strings.HasPrefix(upper, "FOR ") ||
                strings.HasPrefix(upper, "RAISE ") ||
                strings.HasPrefix(upper, "EXECUTE IMMEDIATE") ||
                strings.HasPrefix(upper, "EXCEPTION") {
                return true
        }
        upper = strings.ToUpper(sqlForScriptDetection(sql))
        return strings.Contains(upper, " IF ") ||
                strings.Contains(upper, "\nIF ") ||
                strings.Contains(upper, " WHILE ") ||
                strings.Contains(upper, "\nWHILE ") ||
                strings.Contains(upper, " LOOP ") ||
                strings.Contains(upper, "\nLOOP ") ||
                strings.Contains(upper, " REPEAT") ||
                strings.Contains(upper, "\nREPEAT") ||
                (strings.Contains(upper, " FOR ") && strings.Contains(upper, " IN ")) ||
                (strings.Contains(upper, "\nFOR ") && strings.Contains(upper, " IN ")) ||
                strings.Contains(upper, "EXCEPTION") ||
                strings.Contains(upper, " RAISE ") ||
                strings.Contains(upper, "\nRAISE ") ||
                strings.Contains(upper, "EXECUTE IMMEDIATE")
}

package handlers

import (
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/session"
)

func sessionStore(deps *Dependencies) *session.Store {
        if deps.Sessions == nil {
                deps.Sessions = NewSessionStore()
        }
        return deps.Sessions
}

func queryJobConnectionProperties(cfg *jobs.JobConfiguration) []bqtypes.ConnectionProperty {
        if cfg == nil || cfg.Query == nil {
                return nil
        }
        return cfg.Query.ConnectionProperties
}

func queryJobCreateSession(cfg *jobs.JobConfiguration) bool {
        return cfg != nil && cfg.Query != nil && cfg.Query.CreateSession
}

func stampJobSessionInfo(job *jobs.Job, info *bqtypes.SessionInfo) {
        if job == nil || info == nil {
                return
        }
        job.Statistics.SessionInfo = info
}

package handlers

import (
        "context"
        "crypto/sha256"
        "encoding/base64"
        "encoding/hex"
        "encoding/json"
        "fmt"
        "io"
        "net/http"
        "net/url"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// tableDataInsertAllKind is the `kind` field BigQuery uses on the
// success response of tabledata.insertAll. See
// docs/bigquery/docs/reference/rest/v2/tabledata/insertAll.md.
const tableDataInsertAllKind = "bigquery#tableDataInsertAllResponse"

// tableDataListKind is the `kind` field for a tabledata.list response.
// See docs/bigquery/docs/reference/rest/v2/tabledata/list.md.
const tableDataListKind = "bigquery#tableDataList"

// tableDataListDefaultMaxResults bounds the page size when the
// caller does not specify `maxResults`. Matches what most BigQuery
// client libraries pick on their own (the public API itself does not
// document a server-side default).
const tableDataListDefaultMaxResults = 10000

// tableDataListMaxResultsCap is the upper bound honored for maxResults.
const tableDataListMaxResultsCap = 100000

// decodeInsertAllBody parses the JSON body of tabledata.insertAll
// into the wire-shape struct. An empty body is rejected per the
// upstream spec (rows[] is required for a non-trivial request).
func decodeInsertAllBody(w http.ResponseWriter, r *http.Request) (bqtypes.TableDataInsertAllRequest, bool) {
        var req bqtypes.TableDataInsertAllRequest
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeError(w, http.StatusBadRequest, "invalid",
                        "Could not read tabledata.insertAll request body: "+err.Error())
                return req, false
        }
        if len(body) == 0 {
                return req, true
        }
        if err := json.Unmarshal(body, &req); err != nil {
                writeError(w, http.StatusBadRequest, "invalid",
                        "Could not parse tabledata.insertAll request body as JSON: "+err.Error())
                return req, false
        }
        return req, true
}

// jsonToCell converts a JSON-decoded value into a proto Cell using
// BigQuery's REST f/v wire shape conventions:
//
//   - nil          -> Cell.null_value = true
//   - bool         -> "true"/"false" string
//   - json.Number  -> decimal string verbatim
//   - float64/int  -> formatted decimal string (BigQuery's REST
//     surface stringifies numerics, including INT64, NUMERIC,
//     BIGNUMERIC; only FLOAT64 stays a JSON number on the wire,
//     but the engine still stores it as a string)
//   - string       -> string verbatim
//   - []byte       -> base64 encoded string (BYTES wire shape)
//   - []interface{}-> Array of converted cells
//   - map[string]any -> Struct with fields in iteration order;
//     used when no schema is available
//
// The conversion is intentionally lossy: a `Cell.string_value` is
// enough to round-trip through Storage::Value::String on the engine
// side because the catalog/storage path only requires the bytes to
// come back out shape-preserved. Typing tightens later via the
// resolved AST.
func jsonCellForField(f *enginepb.FieldSchema, v any) *enginepb.Cell {
        if f == nil {
                return jsonToCell(v)
        }
        if isJSONRepeatedFieldMode(f.GetMode()) {
                arr, ok := v.([]any)
                if !ok {
                        return jsonToCell(v)
                }
                elemSchema := jsonRepeatedElementSchema(f)
                out := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(arr))}
                for _, el := range arr {
                        out.Elements = append(out.Elements, jsonCellForField(elemSchema, el))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: out}}
        }
        if isJSONStructFieldType(f.GetType()) {
                m, ok := v.(map[string]any)
                if !ok {
                        return jsonToCell(v)
                }
                st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(f.GetFields()))}
                for _, sub := range f.GetFields() {
                        subV, ok := m[sub.GetName()]
                        if !ok {
                                st.Fields = append(st.Fields, &enginepb.Cell{
                                        Value: &enginepb.Cell_NullValue{NullValue: true},
                                })
                                continue
                        }
                        st.Fields = append(st.Fields, jsonCellForField(sub, subV))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
        }
        return jsonToCell(v)
}

func isJSONRepeatedFieldMode(mode string) bool {
        return strings.EqualFold(strings.TrimSpace(mode), sqlModeRepeated)
}

func jsonRepeatedElementSchema(f *enginepb.FieldSchema) *enginepb.FieldSchema {
        if f == nil {
                return nil
        }
        return &enginepb.FieldSchema{
                Name:        f.GetName(),
                Type:        f.GetType(),
                Description: f.GetDescription(),
                Fields:      f.GetFields(),
        }
}

func isJSONStructFieldType(t string) bool {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case sqlTypeSTRUCT, sqlTypeRECORD:
                return true
        default:
                return false
        }
}

func jsonToCell(v any) *enginepb.Cell {
        if v == nil {
                return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}
        }
        switch val := v.(type) {
        case bool:
                if val {
                        return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "true"}}
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "false"}}
        case json.Number:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: string(val)}}
        case float64:
                // json.Decode produces float64 for any unmarshaled number when
                // Decoder.UseNumber isn't set. Format with FormatFloat to keep
                // integer-valued floats as bare integers (1.0 -> "1") and
                // preserve precision for genuine fractions.
                if val == float64(int64(val)) {
                        return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                                StringValue: strconv.FormatInt(int64(val), 10),
                        }}
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: strconv.FormatFloat(val, 'g', -1, 64),
                }}
        case int:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: strconv.Itoa(val),
                }}
        case int64:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: strconv.FormatInt(val, 10),
                }}
        case string:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: val}}
        case []byte:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: base64.StdEncoding.EncodeToString(val),
                }}
        case []any:
                arr := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(val))}
                for _, el := range val {
                        arr.Elements = append(arr.Elements, jsonToCell(el))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: arr}}
        case map[string]any:
                st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(val))}
                for _, fv := range val {
                        st.Fields = append(st.Fields, jsonToCell(fv))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
        default:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: fmt.Sprintf("%v", val),
                }}
        }
}

// jsonRowToProto converts one insertAll JSON row into a proto DataRow
// by laying its fields out in the column order described by the
// table's gRPC schema. Missing fields become NULL cells so the cell
// count always matches the column count Storage::AppendRows expects.
// Extra fields not present in the schema are dropped (BigQuery's
// ignoreUnknownValues=false is approximated here by always ignoring;
// stricter semantics land alongside row-level validation in the
// query-execution work).
func jsonRowToProto(schema *enginepb.TableSchema, row map[string]any) *enginepb.DataRow {
        out := &enginepb.DataRow{Cells: make([]*enginepb.Cell, 0, len(schema.GetFields()))}
        for _, f := range schema.GetFields() {
                v, ok := row[f.GetName()]
                if !ok {
                        out.Cells = append(out.Cells, &enginepb.Cell{
                                Value: &enginepb.Cell_NullValue{NullValue: true},
                        })
                        continue
                }
                out.Cells = append(out.Cells, jsonCellForField(f, v))
        }
        return out
}

// TableDataInsertAll implements `bigquery.tabledata.insertAll`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/insertAll
//
// Flow: decode the JSON body, look up the destination table's schema
// over Catalog.DescribeTable (so we know the column order), convert
// each row's `json` map into a proto DataRow, and forward the batch
// to Catalog.InsertRows in one shot. A successful response is the
// standard `bigquery#tableDataInsertAllResponse` envelope; row-level
// failures end up in `insertErrors[*]` rather than as an RPC error.
//
// See docs/bigquery/docs/reference/rest/v2/tabledata/insertAll.md for
// the full request/response shapes the emulator targets.
func TableDataInsertAll(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID := tableIDFromPath(r)
                if tableID == "" {
                        writeError(w, http.StatusBadRequest, "invalid",
                                "tableId is required")
                        return
                }
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }

                body, ok := decodeInsertAllBody(w, r)
                if !ok {
                        return
                }

                desc, err := deps.Catalog.DescribeTable(r.Context(), &enginepb.DescribeTableRequest{
                        Table: &enginepb.TableRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                                TableId:   tableID,
                        },
                })
                if grpcToHTTPError(w, err) {
                        return
                }

                protoRows := make([]*enginepb.DataRow, 0, len(body.Rows))
                for _, row := range body.Rows {
                        protoRows = append(protoRows, jsonRowToProto(desc.GetSchema(), row.JSON))
                }

                if len(protoRows) > 0 {
                        _, err = deps.Catalog.InsertRows(r.Context(), &enginepb.InsertRowsRequest{
                                Table: &enginepb.TableRef{
                                        ProjectId: projectID,
                                        DatasetId: datasetID,
                                        TableId:   tableID,
                                },
                                Rows: protoRows,
                        })
                        if grpcToHTTPError(w, err) {
                                return
                        }
                }

                writeJSON(w, http.StatusOK, bqtypes.TableDataInsertAllResponse{
                        Kind: tableDataInsertAllKind,
                })
        }
}

// tableDataListParams holds parsed tabledata.list query parameters.
type tableDataListParams struct {
        startIndex        int64
        maxResults        int64
        selectedFields    []string
        useInt64Timestamp bool
}

// TableDataList implements `bigquery.tabledata.list`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/data
//
// Pagination is honored via the documented `startIndex`, `maxResults`,
// and `pageToken` query parameters: pageToken (when supplied) is a
// decimal string encoding the next start row index, mirroring what
// `next_start_index` we return from the engine's ListRows.
// `selectedFields` projects top-level columns (dotted paths select the
// top-level STRUCT field). `formatOptions.useInt64Timestamp` controls
// TIMESTAMP JSON encoding. Logical views have no Parquet backing;
// tabledata.list returns empty rows — use jobs.query for view preview.
//
// See docs/bigquery/docs/reference/rest/v2/tabledata/list.md.
func TableDataList(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID := tableIDFromPath(r)
                if tableID == "" {
                        writeError(w, http.StatusBadRequest, "invalid",
                                "tableId is required")
                        return
                }
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                startIndex, maxResults, ok := tableDataListPaging(w, r.URL.Query())
                if !ok {
                        return
                }
                listParams, ok := parseTableDataListParams(r.URL.Query(), startIndex, maxResults)
                if !ok {
                        return
                }
                out, err := buildTableDataList(r.Context(), deps, projectID, datasetID, tableID, listParams)
                if grpcToHTTPError(w, err) {
                        return
                }
                writeJSON(w, http.StatusOK, out)
        }
}

func parseTableDataListParams(
        q url.Values,
        startIndex, maxResults int64,
) (tableDataListParams, bool) {
        out := tableDataListParams{
                startIndex: startIndex,
                maxResults: maxResults,
        }
        if raw := strings.TrimSpace(q.Get("selectedFields")); raw != "" {
                for part := range strings.SplitSeq(raw, ",") {
                        part = strings.TrimSpace(part)
                        if part == "" {
                                continue
                        }
                        // BigQuery paths like "e.d.f" select into nested fields; the
                        // gateway projects at top-level granularity (field before ".").
                        if dot := strings.Index(part, "."); dot >= 0 {
                                part = part[:dot]
                        }
                        out.selectedFields = append(out.selectedFields, part)
                }
        }
        switch strings.ToLower(strings.TrimSpace(q.Get("formatOptions.useInt64Timestamp"))) {
        case "1", "true", "t", "yes":
                out.useInt64Timestamp = true
        }
        return out, true
}

func tableDataListPaging(w http.ResponseWriter, q url.Values) (startIndex, maxResults int64, ok bool) {
        startIndex, ok = parsePositiveInt64(w, q.Get("startIndex"), "startIndex", 0)
        if !ok {
                return 0, 0, false
        }
        if tok := q.Get("pageToken"); tok != "" {
                tokIdx, okTok := parsePositiveInt64(w, tok, "pageToken", 0)
                if !okTok {
                        return 0, 0, false
                }
                startIndex = tokIdx
        }
        maxResults, ok = parsePositiveInt64(w, q.Get("maxResults"), "maxResults", tableDataListDefaultMaxResults)
        if !ok {
                return 0, 0, false
        }
        if maxResults > tableDataListMaxResultsCap {
                maxResults = tableDataListMaxResultsCap
        }
        return startIndex, maxResults, ok
}

func buildTableDataList(
        ctx context.Context,
        deps Dependencies,
        projectID, datasetID, tableID string,
        params tableDataListParams,
) (bqtypes.TableDataList, error) {
        table := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
        desc, err := deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: table})
        if err != nil {
                return bqtypes.TableDataList{}, err
        }
        schema := desc.GetSchema()
        formatOpts := bqtypes.WireFormatOptions{UseInt64Timestamp: params.useInt64Timestamp}
        if params.maxResults == 0 {
                total, totalErr := tableDataListTotalRows(ctx, deps.Catalog, table)
                if totalErr != nil {
                        return bqtypes.TableDataList{}, totalErr
                }
                return bqtypes.TableDataList{
                        Kind:      tableDataListKind,
                        Etag:      tableDataListEtag(schema, total),
                        TotalRows: strconv.FormatInt(total, 10),
                }, nil
        }
        resp, err := deps.Catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                Table:      table,
                StartIndex: params.startIndex,
                MaxResults: params.maxResults,
        })
        if err != nil {
                return bqtypes.TableDataList{}, err
        }
        out := bqtypes.TableDataList{
                Kind:      tableDataListKind,
                Etag:      tableDataListEtag(schema, resp.GetTotalRows()),
                TotalRows: strconv.FormatInt(resp.GetTotalRows(), 10),
        }
        if resp.GetNextStartIndex() < resp.GetTotalRows() && params.maxResults > 0 {
                out.PageToken = strconv.FormatInt(resp.GetNextStartIndex(), 10)
        }
        fieldIdx := selectedFieldIndices(schema, params.selectedFields)
        out.Rows = make([]bqtypes.Row, 0, len(resp.GetRows()))
        for _, row := range resp.GetRows() {
                full := bqtypes.CellsToRowForSchema(row.GetCells(), schema, formatOpts)
                out.Rows = append(out.Rows, projectRowFields(full, fieldIdx))
        }
        return out, nil
}

func tableDataListTotalRows(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        table *enginepb.TableRef,
) (int64, error) {
        resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                Table:      table,
                StartIndex: 0,
                MaxResults: 0,
        })
        if err != nil {
                return 0, err
        }
        return resp.GetTotalRows(), nil
}

func tableDataListEtag(schema *enginepb.TableSchema, totalRows int64) string {
        h := sha256.New()
        for _, f := range schema.GetFields() {
                _, _ = h.Write([]byte(f.GetName()))
                _, _ = h.Write([]byte{0})
                _, _ = h.Write([]byte(f.GetType()))
                _, _ = h.Write([]byte{0})
                _, _ = h.Write([]byte(f.GetMode()))
                _, _ = h.Write([]byte{0})
        }
        _, _ = h.Write([]byte(strconv.FormatInt(totalRows, 10)))
        return hex.EncodeToString(h.Sum(nil))[:32]
}

func selectedFieldIndices(schema *enginepb.TableSchema, selected []string) []int {
        if schema == nil || len(selected) == 0 {
                return nil
        }
        byName := map[string]int{}
        for i, f := range schema.GetFields() {
                byName[f.GetName()] = i
        }
        out := make([]int, 0, len(selected))
        seen := map[int]struct{}{}
        for _, name := range selected {
                idx, ok := byName[name]
                if !ok {
                        continue
                }
                if _, dup := seen[idx]; dup {
                        continue
                }
                seen[idx] = struct{}{}
                out = append(out, idx)
        }
        if len(out) == 0 {
                return nil
        }
        return out
}

func projectRowFields(row bqtypes.Row, fieldIdx []int) bqtypes.Row {
        if len(fieldIdx) == 0 {
                return row
        }
        out := bqtypes.Row{F: make([]bqtypes.Cell, 0, len(fieldIdx))}
        for _, idx := range fieldIdx {
                if idx >= 0 && idx < len(row.F) {
                        out.F = append(out.F, row.F[idx])
                }
        }
        return out
}

// parsePositiveInt64 parses an unsigned decimal string from a query
// parameter. Empty input falls back to `defaultValue`. A malformed
// value writes a 400 envelope and returns ok=false so the caller can
// short-circuit.
func parsePositiveInt64(w http.ResponseWriter, raw, name string, defaultValue int64) (int64, bool) {
        if raw == "" {
                return defaultValue, true
        }
        v, err := strconv.ParseInt(raw, 10, 64)
        if err != nil || v < 0 {
                writeError(w, http.StatusBadRequest, "invalid",
                        fmt.Sprintf("Query parameter %q must be a non-negative integer", name))
                return 0, false
        }
        return v, true
}

package handlers

import (
        "encoding/json"
        "errors"
        "fmt"
        "net/http"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// tableKind is the value the BigQuery REST API returns for the `kind`
// field of a Table resource. See
// docs/bigquery/docs/reference/rest/v2/tables/get.md.
const tableKind = "bigquery#table"

// tableListKind is the `kind` field for a TableList response.
const tableListKind = "bigquery#tableList"

// defaultTableType is the value of the Table.type field for the
// non-view, non-external tables the emulator's Catalog tracks today.
const defaultTableType = "TABLE"

// viewTableType is the BigQuery REST type string for views created
// via tables.insert with a view definition.
const viewTableType = "VIEW"

// materializedViewTableType is the BigQuery REST type string for
// materialized views created via tables.insert with a materializedView
// definition (see QueryMaterializedViewIT).
const materializedViewTableType = "MATERIALIZED_VIEW"

// externalTableType is the BigQuery REST type string for GCS-backed
// external tables (tables.insert with externalDataConfiguration).
const externalTableType = "EXTERNAL"

// snapshotTableType is the BigQuery REST type string for table snapshots
// created via configuration.copy jobs with operationType=SNAPSHOT.
const snapshotTableType = "SNAPSHOT"

// tableIDFromPath returns the {projectId}/{datasetId}/{tableId}
// triple captured by the route pattern. It strips any AIP-136 custom-
// method suffix (e.g. ":getIamPolicy") from the tableId so the same
// helper can be reused by TableCustomMethodPOST.
func tableIDFromPath(r *http.Request) (projectID, datasetID, tableID string) {
        projectID = r.PathValue("projectId")
        datasetID = r.PathValue("datasetId")
        tableID, _ = splitColonOp(r.PathValue("tableId"))
        return projectID, datasetID, tableID
}

// tableResource builds a Table resource for a successful response.
// Preserves any caller-supplied Schema/FriendlyName/Description that
// the engine does not need to know about, and stamps the bookkeeping
// fields (Kind, ID, Type, timestamps) the REST client expects.
//
// Labels is materialized to an empty map when nil so the upstream
// `getTableLabels` sample's `Object.entries(table.metadata.labels)`
// call returns an empty iterator instead of erroring with
// `TypeError: Cannot convert undefined or null to object`. The
// bqtypes.Table.Labels tag omits `omitempty` so the empty map
// round-trips as `"labels":{}` on the wire. Mirrors datasetResource.
func tableResource(projectID, datasetID, tableID string, t bqtypes.Table) bqtypes.Table {
        t.Kind = tableKind
        t.ID = projectID + ":" + datasetID + "." + tableID
        t.TableReference = bqtypes.TableReference{
                ProjectID: projectID,
                DatasetID: datasetID,
                TableID:   tableID,
        }
        if t.Type == "" {
                t.Type = defaultTableType
        }
        if t.CreationTime == "" {
                t.CreationTime = nowMillis()
        }
        if t.LastModifiedTime == "" {
                t.LastModifiedTime = t.CreationTime
        }
        if t.Labels == nil {
                t.Labels = bqtypes.ResourceLabels{}
        }
        if t.Location == "" {
                t.Location = "US"
        }
        applyTableStorageStats(&t)
        return t
}

// TableList implements `bigquery.tables.list`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables
//
// Calls the Catalog.ListTables RPC and folds the (deterministically
// ordered, ascending table_id) result into a BigQuery tableList
// envelope. Mirrors DatasetList's pagination posture: no
// `nextPageToken` today, every entry in one page.
//
// Per-entry shape matches upstream's tableList item: kind, id
// (projectId:datasetId.tableId), tableReference, type (defaulting to
// "TABLE"), and an empty labels object so node samples that call
// `Object.entries(item.metadata.labels)` on each iteration item do
// not raise.
func TableList(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                datasetID := r.PathValue("datasetId")
                if deps.Catalog == nil {
                        writeJSON(w, http.StatusOK, map[string]any{
                                resourceKeyKind:       tableListKind,
                                resourceKeyTables:     []bqtypes.Table{},
                                resourceKeyTotalItems: 0,
                        })
                        return
                }
                resp, err := deps.Catalog.ListTables(r.Context(), &enginepb.ListTablesRequest{
                        Dataset: &enginepb.DatasetRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                        },
                })
                if grpcToHTTPError(w, err) {
                        return
                }
                items := make([]map[string]any, 0, len(resp.GetTables()))
                for _, ref := range resp.GetTables() {
                        items = append(items, tableListItem(r.Context(), deps, ref))
                }
                writeJSON(w, http.StatusOK, map[string]any{
                        resourceKeyKind:       tableListKind,
                        resourceKeyTables:     items,
                        resourceKeyTotalItems: len(items),
                })
        }
}

// TableInsert implements `bigquery.tables.insert`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables
//
// Decodes the Table body, forwards the (TableRef, schema) pair to
// Catalog.RegisterTable, and returns the new Table resource on
// success. tableReference.tableId in the body is required.
func TableInsert(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID := r.PathValue("projectId")
                datasetID := r.PathValue("datasetId")
                t, ok := decodeTableBody(w, r)
                if !ok {
                        return
                }
                tableID := t.TableReference.TableID
                if tableID == "" {
                        writeError(w, http.StatusBadRequest, "invalid",
                                "tableReference.tableId is required")
                        return
                }
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                if !populateMaterializedViewSchema(w, deps, r, projectID, &t) {
                        return
                }
                if !populateViewSchema(w, deps, r, projectID, &t) {
                        return
                }
                if rejectUnsupportedTablePosture(w, &t) {
                        return
                }
                if !registerInsertedTable(w, r, deps, projectID, datasetID, tableID, &t) {
                        return
                }
                writeInsertedTableResponse(w, deps, r, projectID, datasetID, tableID, t)
        }
}

// populateMaterializedViewSchema fills Type and Schema on REST MV inserts
// when the client omits schema. Dry-running the MV query lets SELECT *
// expand to analyzed columns instead of zero. Returns false when the
// handler already wrote an error response.
func populateMaterializedViewSchema(
        w http.ResponseWriter,
        deps Dependencies,
        r *http.Request,
        projectID string,
        t *bqtypes.Table,
) bool {
        if t.MaterializedView == nil || t.MaterializedView.Query == "" {
                return true
        }
        if t.Type == "" {
                t.Type = materializedViewTableType
        }
        if t.Schema != nil && len(t.Schema.Fields) > 0 {
                return true
        }
        inferred, inferErr := inferTableSchemaFromQuery(
                deps, r, projectID, t.MaterializedView.Query)
        if inferErr != nil {
                if queryGRPCToHTTPError(w, inferErr) {
                        return false
                }
                writeError(w, http.StatusInternalServerError, reasonInternalError,
                        "Could not infer materialized view schema: "+inferErr.Error())
                return false
        }
        if inferred != nil {
                t.Schema = inferred
        }
        return true
}

// populateViewSchema fills Type and Schema on REST view inserts when
// the client omits schema. Dry-running the view query lets SELECT *
// expand to analyzed columns instead of zero.
func populateViewSchema(
        w http.ResponseWriter,
        deps Dependencies,
        r *http.Request,
        projectID string,
        t *bqtypes.Table,
) bool {
        if t.View == nil || t.View.Query == "" {
                return true
        }
        if t.Type == "" {
                t.Type = viewTableType
        }
        if t.Schema != nil && len(t.Schema.Fields) > 0 {
                return true
        }
        inferred, inferErr := inferTableSchemaFromQuery(deps, r, projectID, t.View.Query)
        if inferErr != nil {
                if queryGRPCToHTTPError(w, inferErr) {
                        return false
                }
                writeError(w, http.StatusInternalServerError, reasonInternalError,
                        "Could not infer view schema: "+inferErr.Error())
                return false
        }
        if inferred != nil {
                t.Schema = inferred
        }
        return true
}

// insertLogicalView registers a REST-created logical view in the
// engine by issuing a `CREATE OR REPLACE VIEW` statement — the same
// path `CREATE VIEW` DDL takes. That lands the view in the engine's
// view registry so a later `SELECT ... FROM <view>` has its stored
// definition inlined at analyze time and returns the base rows. The
// alternative (registering an empty backing table) shadows the view in
// the engine catalog and makes reads return nothing.
//
// Each name component is backtick-quoted independently so project IDs
// with hyphens (and other names that are not bare identifiers) resolve
// as a three-part `project.dataset.view` path rather than a single
// dotted identifier. Returns false (after writing an HTTP error) when
// registration fails.
func insertLogicalView(
        w http.ResponseWriter,
        r *http.Request,
        deps Dependencies,
        projectID, datasetID, tableID, viewQuery string,
) bool {
        if deps.Query == nil {
                writeError(w, http.StatusNotImplemented, reasonInternalError,
                        "engine query client is not configured for view registration")
                return false
        }
        ddl := fmt.Sprintf("CREATE OR REPLACE VIEW `%s`.`%s`.`%s` AS\n%s",
                projectID, datasetID, tableID, viewQuery)
        stream, err := deps.Query.ExecuteQuery(r.Context(), &enginepb.QueryRequest{
                ProjectId: projectID,
                Sql:       ddl,
        })
        if err == nil && stream == nil {
                err = errors.New("engine returned no result stream for view registration")
        }
        if err == nil {
                _, _, _, _, _, _, err = drainSyncStream(stream)
        }
        if err != nil {
                if queryGRPCToHTTPError(w, err) {
                        return false
                }
                writeError(w, http.StatusInternalServerError, reasonInternalError,
                        "Could not register view: "+err.Error())
                return false
        }
        return true
}

// inferTableSchemaFromQuery runs the MV definition query through the
// engine DryRun RPC and returns the analyzed output schema as REST
// TableSchema. Returns (nil, nil) when Query client is nil or sql is
// empty so callers can still register a schema-less table.
func inferTableSchemaFromQuery(deps Dependencies, r *http.Request,
        projectID, sql string,
) (*bqtypes.TableSchema, error) {
        if deps.Query == nil || sql == "" {
                return nil, nil
        }
        resp, err := deps.Query.DryRun(r.Context(), &enginepb.QueryRequest{
                ProjectId: projectID,
                Sql:       sql,
        })
        if err != nil {
                return nil, err
        }
        return schemaFromProto(resp.GetSchema()), nil
}

// tableFromDescribeResponse maps a Catalog.DescribeTable RPC payload
// into the REST Table shape, including logical-view metadata when the
// engine resolved the target from the view registry.
func tableFromDescribeResponse(resp *enginepb.DescribeTableResponse) bqtypes.Table {
        t := bqtypes.Table{Schema: normalizeRESTTableSchema(schemaFromProto(resp.GetSchema()))}
        if tableType := resp.GetTableType(); tableType != "" {
                t.Type = tableType
        }
        if viewQuery := resp.GetViewQuery(); viewQuery != "" {
                t.View = &bqtypes.ViewDefinition{
                        Query:        viewQuery,
                        UseLegacySQL: resp.GetViewUseLegacySql(),
                }
        }
        return t
}

// TableGet implements `bigquery.tables.get`:
//
//        GET /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
//
// Resolves the table via Catalog.DescribeTable so a missing table
// surfaces as 404. The response composites the (Kind, TableReference,
// schema) into a Table resource; other metadata is left empty until
// Storage tracks it.
func TableGet(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID := tableIDFromPath(r)
                if deps.Catalog == nil {
                        writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, bqtypes.Table{}))
                        return
                }
                resp, err := deps.Catalog.DescribeTable(r.Context(), &enginepb.DescribeTableRequest{
                        Table: &enginepb.TableRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                                TableId:   tableID,
                        },
                })
                if err != nil {
                        // A logical view has no backing storage table, so the
                        // engine's DescribeTable returns NotFound. Serve it from
                        // the REST metadata overlay recorded at tables.insert
                        // instead of 404 so a `create_table(view)` + `get_table`
                        // round-trip keeps working (the view rows still come from
                        // the query path, which inlines the registered definition).
                        if overlay, ok := deps.Metadata.GetTable(projectID, datasetID, tableID); ok &&
                                (overlay.View != nil || overlay.MaterializedView != nil) {
                                writeJSON(w, http.StatusOK,
                                        tableResource(projectID, datasetID, tableID, overlay))
                                return
                        }
                        grpcToHTTPError(w, err)
                        return
                }
                t := catalogTable(r.Context(), deps, projectID, datasetID, tableID, resp)
                writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, t))
        }
}

// TableUpdate implements `bigquery.tables.update`:
//
//        PUT /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
//
// Full replacement of the Table metadata. The engine has no update RPC
// yet, so the handler echoes the request body back as the canonical
// resource (stamping kind/id/timestamps). The REST-only metadata
// fields (labels, expirationTime, rangePartitioning, ...) are also
// stashed in the in-memory MetadataStore so a follow-up GET returns
// the updated values instead of the engine-only schema view.
func TableUpdate(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID := tableIDFromPath(r)
                t, ok := decodeTableBody(w, r)
                if !ok {
                        return
                }
                if rejectUnsupportedTablePosture(w, &t) {
                        return
                }
                deps.Metadata.PutTable(projectID, datasetID, tableID, t)
                SyncColumnGovernanceFromSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema)
                writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, t))
        }
}

// TablePatch implements `bigquery.tables.patch`:
//
//        PATCH /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
//
// Sparse update; mirrors TableUpdate's metadata-stash posture so
// upstream `setMetadata` + `getMetadata` sequences roundtrip the
// REST-only fields. The engine has no true patch RPC yet.
func TablePatch(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID := tableIDFromPath(r)
                t, ok := decodeTableBody(w, r)
                if !ok {
                        return
                }
                if rejectUnsupportedTablePosture(w, &t) {
                        return
                }
                deps.Metadata.MergeTable(projectID, datasetID, tableID, t)
                if err := syncPatchedTableSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema); err != nil {
                        writeError(w, http.StatusBadRequest, reasonInvalid, err.Error())
                        return
                }
                SyncColumnGovernanceFromSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema)
                if deps.Catalog == nil {
                        out := t
                        if merged, ok := deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
                                out = merged
                        }
                        if t.LabelsPatchPresent() && len(out.Labels) == 0 {
                                out.SetOmitEmptyLabelsOnWire(true)
                        }
                        writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, out))
                        return
                }
                tableRef := &enginepb.TableRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                        TableId:   tableID,
                }
                desc, err := deps.Catalog.DescribeTable(r.Context(), &enginepb.DescribeTableRequest{Table: tableRef})
                if err != nil {
                        grpcToHTTPError(w, err)
                        return
                }
                out := catalogTable(r.Context(), deps, projectID, datasetID, tableID, desc)
                if t.LabelsPatchPresent() && len(out.Labels) == 0 {
                        out.SetOmitEmptyLabelsOnWire(true)
                }
                writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, out))
        }
}

// TableDelete implements `bigquery.tables.delete`:
//
//        DELETE /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}
func TableDelete(deps Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                projectID, datasetID, tableID := tableIDFromPath(r)
                if deps.Catalog == nil {
                        NotImplemented(w, r)
                        return
                }
                if deps.Snapshots != nil {
                        _ = deps.Snapshots.CaptureBeforeDelete(r.Context(), deps.Catalog,
                                projectID, datasetID, tableID)
                }
                _, err := deps.Catalog.DropTable(r.Context(), &enginepb.DropTableRequest{
                        Table: &enginepb.TableRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                                TableId:   tableID,
                        },
                })
                if grpcToHTTPError(w, err) {
                        return
                }
                deps.Metadata.DeleteTable(projectID, datasetID, tableID)
                writeJSON(w, http.StatusOK, struct{}{})
        }
}

// localStubIamPolicyEtag is the deterministic etag returned by the
// emulator's metadata-only table IAM stub (no real ACL store).
const localStubIamPolicyEtag = "BwWWja0YfJA="

func localStubEmptyIamPolicy() map[string]any {
        return map[string]any{
                "version":  1,
                "bindings": []any{},
                "etag":     localStubIamPolicyEtag,
        }
}

// TableGetIamPolicy implements `bigquery.tables.getIamPolicy`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}:getIamPolicy
//
// Reached via TableCustomMethodPOST after parsing the trailing :op.
func TableGetIamPolicy(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, _ *http.Request) {
                writeJSON(w, http.StatusOK, localStubEmptyIamPolicy())
        }
}

// TableSetIamPolicy implements `bigquery.tables.setIamPolicy`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}:setIamPolicy
//
// Reached via TableCustomMethodPOST after parsing the trailing :op.
func TableSetIamPolicy(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) {
                var req struct {
                        Policy map[string]any `json:"policy"`
                }
                _ = json.NewDecoder(r.Body).Decode(&req)
                pol := req.Policy
                if pol == nil {
                        pol = localStubEmptyIamPolicy()
                } else {
                        if _, ok := pol["bindings"]; !ok {
                                pol["bindings"] = []any{}
                        }
                        if _, ok := pol["etag"]; !ok {
                                pol["etag"] = localStubIamPolicyEtag
                        }
                }
                writeJSON(w, http.StatusOK, pol)
        }
}

// TableTestIamPermissions implements `bigquery.tables.testIamPermissions`:
//
//        POST /bigquery/v2/projects/{projectId}/datasets/{datasetId}/tables/{tableId}:testIamPermissions
//
// Reached via TableCustomMethodPOST after parsing the trailing :op.
func TableTestIamPermissions(_ Dependencies) http.HandlerFunc {
        return func(w http.ResponseWriter, r *http.Request) { NotImplemented(w, r) }
}

// TableCustomMethodPOST dispatches the AIP-136 custom-method POST
// endpoints registered against `/tables/{tableId}` -- the three IAM
// helpers BigQuery exposes for table resources.
func TableCustomMethodPOST(deps Dependencies) http.HandlerFunc {
        getPolicy := TableGetIamPolicy(deps)
        setPolicy := TableSetIamPolicy(deps)
        testPerms := TableTestIamPermissions(deps)
        return func(w http.ResponseWriter, r *http.Request) {
                _, op := splitColonOp(r.PathValue("tableId"))
                switch op {
                case "getIamPolicy":
                        getPolicy(w, r)
                case "setIamPolicy":
                        setPolicy(w, r)
                case "testIamPermissions":
                        testPerms(w, r)
                case "":
                        writeError(w, http.StatusMethodNotAllowed, "invalid",
                                "POST is not allowed on a table resource. "+
                                        "Use POST /tables to create, /insertAll to stream rows, "+
                                        "or a documented :op IAM custom method.")
                default:
                        writeError(w, http.StatusNotFound, "notFound",
                                "Unknown table custom method ':"+op+"'.")
                }
        }
}

package handlers

import (
        "context"
        "strconv"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
)

// tableListItem builds one tables.list entry from Catalog.ListTables
// output plus metadata overlay and optional DescribeTable view query.
func tableListItem(ctx context.Context, deps Dependencies, ref *enginepb.TableRef) map[string]any {
        overlay, hasOverlay := deps.Metadata.GetTable(
                ref.GetProjectId(), ref.GetDatasetId(), ref.GetTableId(),
        )
        labels := bqtypes.ResourceLabels{}
        if hasOverlay && overlay.Labels != nil {
                labels = overlay.Labels
        }
        tableType := defaultTableType
        if hasOverlay && overlay.Type != "" {
                tableType = overlay.Type
        } else if refType := ref.GetTableType(); refType != "" {
                tableType = refType
        }
        item := map[string]any{
                "kind": tableKind,
                "id": ref.GetProjectId() + ":" + ref.GetDatasetId() +
                        "." + ref.GetTableId(),
                "tableReference": bqtypes.TableReference{
                        ProjectID: ref.GetProjectId(),
                        DatasetID: ref.GetDatasetId(),
                        TableID:   ref.GetTableId(),
                },
                "type":   tableType,
                "labels": labels,
        }
        if hasOverlay {
                mergeListViewQueryFromOverlay(overlay, item)
        }
        if _, hasView := item["view"]; !hasView {
                if _, hasMV := item["materializedView"]; !hasMV {
                        mergeListViewQueryFromCatalog(ctx, deps, ref, tableType, item)
                }
        }
        return item
}

func mergeListViewQueryFromOverlay(overlay bqtypes.Table, item map[string]any) {
        if overlay.View != nil && overlay.View.Query != "" {
                item["view"] = map[string]any{discoveryMethodQuery: overlay.View.Query}
        }
        if overlay.MaterializedView != nil && overlay.MaterializedView.Query != "" {
                item["materializedView"] = map[string]any{
                        discoveryMethodQuery: overlay.MaterializedView.Query,
                }
        }
}

// catalogTable builds the REST Table resource the same way TableGet does
// after a successful DescribeTable (engine schema + metadata overlay).
func catalogTable(
        ctx context.Context,
        deps Dependencies,
        projectID, datasetID, tableID string,
        resp *enginepb.DescribeTableResponse,
) bqtypes.Table {
        t := tableFromDescribeResponse(resp)
        if overlay, ok := deps.Metadata.GetDataset(projectID, datasetID); ok && overlay.Location != "" {
                t.Location = overlay.Location
        }
        if overlay, ok := deps.Metadata.GetTable(projectID, datasetID, tableID); ok {
                t = applyTableMetadataOverlay(t, overlay)
        }
        if t.DefaultCollation != "" {
                t.Schema = bqtypes.ApplyDefaultCollationToStringFields(t.Schema, t.DefaultCollation)
        }
        if deps.Snapshots != nil {
                if ct, ok := deps.Snapshots.CreationTimeMs(projectID, datasetID, tableID); ok && t.CreationTime == "" {
                        t.CreationTime = strconv.FormatInt(ct, 10)
                }
        }
        if deps.Catalog != nil {
                if rowsResp, listErr := deps.Catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                        Table: &enginepb.TableRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                                TableId:   tableID,
                        },
                        StartIndex: 0,
                        MaxResults: 0,
                }); listErr == nil {
                        t.NumRows = strconv.FormatInt(rowsResp.GetTotalRows(), 10)
                } else if t.NumRows == "" {
                        t.NumRows = "0"
                }
        }
        applyTableStorageStats(&t)
        return t
}

// mergeListViewQueryFromCatalog attaches view.query (or
// materializedView.query) from Catalog.DescribeTable when the metadata
// overlay did not stash DDL text — e.g. three-segment backtick CREATE
// VIEW forms the gateway parser does not mirror into overlay.
func mergeListViewQueryFromCatalog(
        ctx context.Context,
        deps Dependencies,
        ref *enginepb.TableRef,
        tableType string,
        item map[string]any,
) {
        if deps.Catalog == nil {
                return
        }
        isView := tableType == viewTableType || ref.GetTableType() == viewTableType
        isMV := tableType == materializedViewTableType ||
                ref.GetTableType() == materializedViewTableType
        if !isView && !isMV {
                return
        }
        desc, err := deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{
                Table: &enginepb.TableRef{
                        ProjectId: ref.GetProjectId(),
                        DatasetId: ref.GetDatasetId(),
                        TableId:   ref.GetTableId(),
                },
        })
        if err != nil {
                return
        }
        if isMV {
                if q := desc.GetViewQuery(); q != "" {
                        item["materializedView"] = map[string]any{discoveryMethodQuery: q}
                }
                return
        }
        if q := desc.GetViewQuery(); q != "" {
                item["view"] = map[string]any{discoveryMethodQuery: q}
        }
}

// applyTableStorageStats fills output-only byte counters so the console
// Details tab shows explicit zeros instead of em dashes. NumRows is
// computed from Catalog.ListRows; byte breakdowns are stubbed until
// the engine exposes storage statistics RPCs.
func applyTableStorageStats(t *bqtypes.Table) {
        if t.NumBytes == "" {
                t.NumBytes = "0"
        }
        if t.NumLongTermBytes == "" {
                t.NumLongTermBytes = "0"
        }
        if t.NumActiveLogicalBytes == "" {
                t.NumActiveLogicalBytes = "0"
        }
        if t.NumTotalLogicalBytes == "" {
                t.NumTotalLogicalBytes = "0"
        }
        if t.NumCurrentPhysicalBytes == "" {
                t.NumCurrentPhysicalBytes = "0"
        }
        if t.NumPhysicalBytes == "" {
                t.NumPhysicalBytes = "0"
        }
        if t.NumActivePhysicalBytes == "" {
                t.NumActivePhysicalBytes = "0"
        }
        if t.NumLongTermPhysicalBytes == "" {
                t.NumLongTermPhysicalBytes = "0"
        }
        if t.NumTimeTravelPhysicalBytes == "" {
                t.NumTimeTravelPhysicalBytes = "0"
        }
}

// syncPatchedTableSchema registers schema fields added via tables.patch
// (setMetadata) so tables.get returns engine-backed column types instead
// of overlay-only stubs.
func syncPatchedTableSchema(
        ctx context.Context,
        deps Dependencies,
        projectID, datasetID, tableID string,
        patchSchema *bqtypes.TableSchema,
) error {
        if deps.Catalog == nil || patchSchema == nil || len(patchSchema.Fields) == 0 {
                return nil
        }
        tableRef := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
        desc, err := deps.Catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
        if err != nil {
                return err
        }
        existing := schemaFromProto(desc.GetSchema())
        merged, changed, err := load.MergeSchemasForTablePatch(existing, patchSchema)
        if err != nil {
                return err
        }
        if !changed {
                return nil
        }
        _, err = load.ApplySchemaUpdate(ctx, deps.Catalog, tableRef, merged, load.TablePatchSchemaOptions)
        return err
}

package handlers

import (
        "net/http"
        "strconv"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// registerInsertedTable forwards the decoded Table to the engine catalog
// (view registry, external table path, or generic RegisterTable).
// Returns false when the handler already wrote an error response.
func registerInsertedTable(
        w http.ResponseWriter,
        r *http.Request,
        deps Dependencies,
        projectID, datasetID, tableID string,
        t *bqtypes.Table,
) bool {
        switch {
        case t.View != nil && t.View.Query != "":
                // A logical view must be registered in the engine's view
                // registry (the same path CREATE VIEW DDL takes) so reads
                // inline its stored definition. Registering an empty backing
                // table instead — as the generic branch below does — shadows
                // the view in the engine catalog (FindTable resolves storage
                // before the view registry), so SELECT ... FROM <view>
                // silently returns zero rows. This is the REST-API analogue
                // of the CREATE-VIEW-on-read fix.
                return insertLogicalView(w, r, deps, projectID, datasetID, tableID, t.View.Query)
        case t.ExternalDataConfiguration != nil:
                return insertExternalTable(w, r, deps, projectID, datasetID, tableID, t)
        default:
                _, err := deps.Catalog.RegisterTable(r.Context(), &enginepb.RegisterTableRequest{
                        Table: &enginepb.TableRef{
                                ProjectId: projectID,
                                DatasetId: datasetID,
                                TableId:   tableID,
                        },
                        Schema: schemaToProto(t.Schema),
                })
                return !grpcToHTTPError(w, err)
        }
}

func writeInsertedTableResponse(
        w http.ResponseWriter,
        deps Dependencies,
        r *http.Request,
        projectID, datasetID, tableID string,
        t bqtypes.Table,
) {
        if t.DefaultCollation != "" {
                t.Schema = bqtypes.ApplyDefaultCollationToStringFields(t.Schema, t.DefaultCollation)
        }
        deps.Metadata.PutTable(projectID, datasetID, tableID, t)
        SyncColumnGovernanceFromSchema(r.Context(), deps, projectID, datasetID, tableID, t.Schema)
        created := nowMillis()
        if deps.Snapshots != nil {
                if ms, parseErr := strconv.ParseInt(created, 10, 64); parseErr == nil {
                        deps.Snapshots.RecordCreation(projectID, datasetID, tableID, ms)
                }
        }
        out := t
        if out.DefaultCollation != "" {
                out.Schema = bqtypes.ApplyDefaultCollationToStringFields(out.Schema, out.DefaultCollation)
        }
        writeJSON(w, http.StatusOK, tableResource(projectID, datasetID, tableID, out))
}

package handlers

import (
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// schemaToProto converts a REST TableSchema into the gRPC TableSchema
// the engine accepts. Returns nil when the REST schema is nil so the
// proto's default zero-value gets sent on the wire.
func schemaToProto(s *bqtypes.TableSchema) *enginepb.TableSchema {
        if s == nil {
                return nil
        }
        out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.Fields))}
        for i := range s.Fields {
                out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
        }
        return out
}

// fieldToProto recursively converts a REST TableFieldSchema into the
// gRPC FieldSchema. Nested STRUCT/RECORD fields are walked verbatim.
func fieldToProto(f bqtypes.TableFieldSchema) *enginepb.FieldSchema {
        out := &enginepb.FieldSchema{
                Name:        f.Name,
                Type:        f.Type,
                Mode:        f.Mode,
                Description: f.Description,
        }
        for i := range f.Fields {
                out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
        }
        return out
}

// schemaFromProto is the inverse of schemaToProto: turns a gRPC
// TableSchema into the REST TableSchema. Returns nil for an absent or
// empty schema so the JSON response omits the field.
func schemaFromProto(s *enginepb.TableSchema) *bqtypes.TableSchema {
        if s == nil || len(s.Fields) == 0 {
                return nil
        }
        out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, 0, len(s.Fields))}
        for _, f := range s.Fields {
                out.Fields = append(out.Fields, fieldFromProto(f))
        }
        return out
}

func fieldFromProto(f *enginepb.FieldSchema) bqtypes.TableFieldSchema {
        fieldType := normalizeRESTFieldType(f.GetType())
        if strings.EqualFold(fieldType, "STRUCT") {
                fieldType = "RECORD"
        }
        out := bqtypes.TableFieldSchema{
                Name:        normalizeRESTFieldName(f.GetName()),
                Type:        fieldType,
                Mode:        f.GetMode(),
                Description: f.GetDescription(),
        }
        for _, sub := range f.GetFields() {
                out.Fields = append(out.Fields, fieldFromProto(sub))
        }
        return out
}

// normalizeRESTFieldName maps analyzer-synthesized column names ($col1, …)
// to the f0_, f1_, … aliases the Node client expects for anonymous SELECT
// outputs (queryParamsTimestamps sample reads row.f0_).
func normalizeRESTFieldName(name string) string {
        if len(name) >= 5 && strings.HasPrefix(name, "$col") {
                if n, err := strconv.Atoi(name[4:]); err == nil && n > 0 {
                        return "f" + strconv.Itoa(n-1) + "_"
                }
        }
        return name
}

func normalizeRESTFieldType(t string) string {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case sqlTypeINT64:
                return sqlTypeINTEGER
        case "FLOAT64":
                return "FLOAT"
        case "BOOL":
                return "BOOLEAN"
        default:
                return t
        }
}

func normalizeRESTTableSchema(s *bqtypes.TableSchema) *bqtypes.TableSchema {
        if s == nil {
                return nil
        }
        out := *s
        out.Fields = make([]bqtypes.TableFieldSchema, len(s.Fields))
        for i, f := range s.Fields {
                out.Fields[i] = f
                out.Fields[i].Type = normalizeRESTFieldType(f.Type)
                if len(f.Fields) > 0 {
                        nested := &bqtypes.TableSchema{Fields: f.Fields}
                        if norm := normalizeRESTTableSchema(nested); norm != nil {
                                out.Fields[i].Fields = norm.Fields
                        }
                }
        }
        return &out
}

package handlers

// persistViewFromDDL registers a view parsed from CREATE [OR REPLACE]
// VIEW / CREATE [OR REPLACE] MATERIALIZED VIEW DDL in the gateway
// MetadataStore so tables.list / tables.get surface type and
// view.query (or materializedView.query) for query-job-created views.
func persistViewFromDDL(
        deps *Dependencies,
        projectID, defaultDatasetID, sql string,
) {
        t, ok := parseCreateViewDDL(projectID, defaultDatasetID, sql)
        if !ok {
                return
        }
        ref := t.TableReference
        deps.Metadata.PutTable(ref.ProjectID, ref.DatasetID, ref.TableID, t)
}

// evictViewFromDDL removes view metadata stashed by persistViewFromDDL
// after DROP VIEW / DROP MATERIALIZED VIEW DDL. DROP VIEW surfaces as
// statementType DROP_TABLE in the engine envelope; parseDropViewDDL
// distinguishes it from DROP TABLE.
func evictViewFromDDL(
        deps *Dependencies,
        projectID, defaultDatasetID, sql string,
        materializedOnly bool,
) {
        pID, dID, tID, ok := parseDropViewDDL(projectID, defaultDatasetID, sql, materializedOnly)
        if !ok {
                return
        }
        deps.Metadata.DeleteTable(pID, dID, tID)
}

// handleViewDDLAfterQuery mirrors routines/models DDL persistence for
// views created or dropped through jobs.query / jobs.insert query jobs.
func handleViewDDLAfterQuery(
        deps *Dependencies,
        projectID, defaultDatasetID, sql, statementType string,
) {
        switch statementType {
        case "CREATE_VIEW", "CREATE_MATERIALIZED_VIEW":
                persistViewFromDDL(deps, projectID, defaultDatasetID, sql)
        case "DROP_MATERIALIZED_VIEW":
                evictViewFromDDL(deps, projectID, defaultDatasetID, sql, true)
        case "DROP_TABLE":
                evictViewFromDDL(deps, projectID, defaultDatasetID, sql, false)
        }
}

package handlers

import (
        "strings"
        "unicode"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// parseCreateViewDDL extracts the target table reference and AS-query
// from CREATE [OR REPLACE] [MATERIALIZED] VIEW DDL. defaultDatasetID
// applies when the view name is one- or two-part qualified.
func parseCreateViewDDL(projectID, defaultDatasetID, sql string) (bqtypes.Table, bool) {
        rest, materialized, ok := stripCreateViewHeader(sql)
        if !ok {
                return bqtypes.Table{}, false
        }
        name, rest, ok := parseViewQuotedName(rest)
        if !ok {
                return bqtypes.Table{}, false
        }
        pID, dID, tID := splitViewTableName(projectID, defaultDatasetID, name)
        query, ok := parseViewQueryFromRest(rest)
        if !ok {
                return bqtypes.Table{}, false
        }
        t := bqtypes.Table{
                Type: viewTableType,
                View: &bqtypes.ViewDefinition{Query: query},
        }
        if materialized {
                t.Type = materializedViewTableType
                t.View = nil
                t.MaterializedView = &bqtypes.MaterializedViewDefinition{Query: query}
        }
        _ = pID
        _ = dID
        _ = tID
        t.TableReference = bqtypes.TableReference{
                ProjectID: pID,
                DatasetID: dID,
                TableID:   tID,
        }
        return t, true
}

// parseDropViewDDL extracts the target of DROP [MATERIALIZED] VIEW
// [IF EXISTS] DDL. When materializedOnly is true, only materialized-
// view drop forms match; when false, only logical-view drop forms match.
func parseDropViewDDL(
        projectID, defaultDatasetID, sql string,
        materializedOnly bool,
) (pID, dID, tID string, ok bool) {
        rest, materialized, ok := stripDropViewHeader(sql)
        if !ok {
                return "", "", "", false
        }
        if materializedOnly && !materialized {
                return "", "", "", false
        }
        if !materializedOnly && materialized {
                return "", "", "", false
        }
        name, _, ok := parseViewQuotedName(rest)
        if !ok {
                return "", "", "", false
        }
        pID, dID, tID = splitViewTableName(projectID, defaultDatasetID, name)
        return pID, dID, tID, true
}

func stripCreateViewHeader(sql string) (rest string, materialized bool, ok bool) {
        trimmed := strings.TrimSpace(sql)
        upper := strings.ToUpper(trimmed)
        for _, p := range []struct {
                prefix string
                mat    bool
        }{
                {"CREATE OR REPLACE MATERIALIZED VIEW", true},
                {"CREATE MATERIALIZED VIEW", true},
                {"CREATE OR REPLACE VIEW", false},
                {"CREATE VIEW", false},
        } {
                if strings.HasPrefix(upper, p.prefix) {
                        return strings.TrimSpace(trimmed[len(p.prefix):]), p.mat, true
                }
        }
        return "", false, false
}

func stripDropViewHeader(sql string) (rest string, materialized bool, ok bool) {
        trimmed := strings.TrimSpace(sql)
        upper := strings.ToUpper(trimmed)
        for _, p := range []struct {
                prefix string
                mat    bool
        }{
                {"DROP MATERIALIZED VIEW IF EXISTS", true},
                {"DROP MATERIALIZED VIEW", true},
                {"DROP VIEW IF EXISTS", false},
                {"DROP VIEW", false},
        } {
                if strings.HasPrefix(upper, p.prefix) {
                        return strings.TrimSpace(trimmed[len(p.prefix):]), p.mat, true
                }
        }
        return "", false, false
}

func parseViewQuotedName(s string) (name, rest string, ok bool) {
        s = strings.TrimSpace(s)
        if len(s) == 0 {
                return "", "", false
        }
        if s[0] == '`' {
                end := strings.Index(s[1:], "`")
                if end < 0 {
                        return "", "", false
                }
                return s[1 : end+1], strings.TrimSpace(s[end+2:]), true
        }
        i := 0
        for i < len(s) && !unicode.IsSpace(rune(s[i])) {
                i++
        }
        if i == 0 {
                return "", "", false
        }
        return s[:i], strings.TrimSpace(s[i:]), true
}

func splitViewTableName(projectID, defaultDatasetID, name string) (project, dataset, table string) {
        parts := strings.Split(name, ".")
        switch len(parts) {
        case 1:
                return projectID, defaultDatasetID, parts[0]
        case 2:
                return projectID, parts[0], parts[1]
        default:
                return parts[0], parts[1], parts[len(parts)-1]
        }
}

func parseViewQueryFromRest(rest string) (string, bool) {
        rest = skipViewOptionsClause(strings.TrimSpace(rest))
        rest = strings.TrimSpace(rest)
        idx, ok := findTopLevelAS(rest)
        if !ok {
                return "", false
        }
        query := strings.TrimSpace(rest[idx:])
        if query == "" {
                return "", false
        }
        return query, true
}

func skipViewOptionsClause(rest string) string {
        rest = strings.TrimSpace(rest)
        for strings.HasPrefix(strings.ToUpper(rest), "OPTIONS") {
                if !strings.HasPrefix(rest, "(") && !strings.HasPrefix(strings.ToUpper(rest), "OPTIONS(") {
                        break
                }
                open := strings.Index(rest, "(")
                if open < 0 {
                        break
                }
                inner, tail, ok := scanViewBalanced(rest[open:], '(', ')')
                if !ok {
                        break
                }
                _ = inner
                rest = strings.TrimSpace(tail)
        }
        return rest
}

func findTopLevelAS(s string) (after int, ok bool) {
        depth := 0
        angle := 0
        inQuote := byte(0)
        for i := 0; i < len(s); i++ {
                c := s[i]
                if inQuote != 0 {
                        if c == '\\' && i+1 < len(s) {
                                i++
                                continue
                        }
                        if c == inQuote {
                                inQuote = 0
                        }
                        continue
                }
                switch c {
                case '\'', '"', '`':
                        inQuote = c
                case '<':
                        angle++
                case '>':
                        if angle > 0 {
                                angle--
                        }
                case '(', '[':
                        depth++
                case ')', ']':
                        if depth > 0 {
                                depth--
                        }
                }
                if depth == 0 && angle == 0 && isViewASKeywordAt(s, i) {
                        return i + 2, true
                }
        }
        return 0, false
}

func isViewASKeywordAt(s string, i int) bool {
        if i+2 > len(s) || !strings.EqualFold(s[i:i+2], "AS") {
                return false
        }
        if i > 0 && isViewIdentChar(s[i-1]) {
                return false
        }
        if i+2 < len(s) && isViewIdentChar(s[i+2]) {
                return false
        }
        return true
}

func isViewIdentChar(b byte) bool {
        return unicode.IsLetter(rune(b)) || unicode.IsDigit(rune(b)) || b == '_'
}

func scanViewBalanced(s string, open, close byte) (inner, rest string, ok bool) {
        if len(s) == 0 || s[0] != open {
                return "", "", false
        }
        depth := 0
        inQuote := byte(0)
        for i := 0; i < len(s); i++ {
                c := s[i]
                if inQuote != 0 {
                        if c == '\\' && i+1 < len(s) {
                                i++
                                continue
                        }
                        if c == inQuote {
                                inQuote = 0
                        }
                        continue
                }
                switch c {
                case '\'', '"', '`':
                        inQuote = c
                case open:
                        depth++
                case close:
                        depth--
                        if depth == 0 {
                                return s[1:i], strings.TrimSpace(s[i+1:]), true
                        }
                }
        }
        return "", "", false
}

package jobs

import (
        "strconv"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// InfoSchemaJobsDataset is the internal dataset used when rewriting
// INFORMATION_SCHEMA.JOBS* queries to a catalog table the engine can scan.
const InfoSchemaJobsDataset = "_bqemu_jobs"

// InfoSchemaJobsTable is the table id holding materialized job rows.
const InfoSchemaJobsTable = "JOBS"

const (
        infoSchemaColProjectID  = "project_id"
        infoSchemaTypeString    = "STRING"
        infoSchemaTypeTimestamp = "TIMESTAMP"
        infoSchemaTypeInt64     = "INT64"
        infoSchemaTypeStruct    = "STRUCT"
)

// InfoSchemaJobsSchema is the column layout for the materialized JOBS view.
func InfoSchemaJobsSchema() *enginepb.TableSchema {
        return &enginepb.TableSchema{Fields: []*enginepb.FieldSchema{
                {Name: "job_id", Type: infoSchemaTypeString},
                {Name: "creation_time", Type: infoSchemaTypeTimestamp},
                {Name: "start_time", Type: infoSchemaTypeTimestamp},
                {Name: "end_time", Type: infoSchemaTypeTimestamp},
                {Name: "state", Type: infoSchemaTypeString},
                {Name: "job_type", Type: infoSchemaTypeString},
                {Name: infoSchemaColProjectID, Type: infoSchemaTypeString},
                {Name: "query", Type: infoSchemaTypeString},
                {Name: "statement_type", Type: infoSchemaTypeString},
                {Name: "user_email", Type: infoSchemaTypeString},
                {Name: "parent_job_id", Type: infoSchemaTypeString},
                {Name: "total_bytes_processed", Type: infoSchemaTypeInt64},
                {Name: "cache_hit", Type: "BOOL"},
                {
                        Name: "destination_table", Type: infoSchemaTypeStruct, Fields: []*enginepb.FieldSchema{
                                {Name: infoSchemaColProjectID, Type: infoSchemaTypeString},
                                {Name: "dataset_id", Type: infoSchemaTypeString},
                                {Name: "table_id", Type: infoSchemaTypeString},
                        },
                },
                {
                        Name: "error_result", Type: infoSchemaTypeStruct, Fields: []*enginepb.FieldSchema{
                                {Name: "reason", Type: infoSchemaTypeString},
                                {Name: "message", Type: infoSchemaTypeString},
                        },
                },
                {
                        Name: "dml_statistics", Type: infoSchemaTypeStruct, Fields: []*enginepb.FieldSchema{
                                {Name: "inserted_row_count", Type: infoSchemaTypeInt64},
                                {Name: "deleted_row_count", Type: infoSchemaTypeInt64},
                                {Name: "updated_row_count", Type: infoSchemaTypeInt64},
                        },
                },
        }}
}

// InfoSchemaJobRows materializes registry jobs for projectID into map rows
// matching InfoSchemaJobsSchema.
func InfoSchemaJobRows(reg *Registry, projectID string) []map[string]any {
        if reg == nil {
                return nil
        }
        all, _ := reg.ListByProject(projectID, ListOptions{})
        out := make([]map[string]any, 0, len(all))
        for _, j := range all {
                out = append(out, infoSchemaRowFromJob(j))
        }
        return out
}

func infoSchemaRowFromJob(j *Job) map[string]any {
        if j == nil {
                return map[string]any{}
        }
        row := map[string]any{
                "job_id":                j.JobReference.JobID,
                "creation_time":         millisToTimestamp(j.Statistics.CreationTime),
                "start_time":            millisToTimestamp(j.Statistics.StartTime),
                "end_time":              millisToTimestamp(j.Statistics.EndTime),
                "state":                 j.Status.State,
                "job_type":              jobTypeFromConfiguration(j.Configuration),
                infoSchemaColProjectID:  j.JobReference.ProjectID,
                "query":                 queryTextFromConfiguration(j.Configuration),
                "statement_type":        statementTypeFromJob(j),
                "user_email":            j.UserEmail,
                "parent_job_id":         parentJobID(j),
                "total_bytes_processed": parseInt64OrZero(j.Statistics.TotalBytesProcessed),
                "cache_hit":             false,
        }
        if dest := destinationTableFromConfiguration(j.Configuration); dest != nil {
                row["destination_table"] = dest
        }
        if j.Status.ErrorResult != nil {
                row["error_result"] = map[string]any{
                        "reason":  j.Status.ErrorResult.Reason,
                        "message": j.Status.ErrorResult.Message,
                }
        }
        if dml := dmlStatsFromJob(j); dml != nil {
                row["dml_statistics"] = dml
        }
        return row
}

func parentJobID(j *Job) string {
        if j.ParentJobID != "" {
                return j.ParentJobID
        }
        return j.Statistics.ParentJobID
}

func jobTypeFromConfiguration(cfg *JobConfiguration) string {
        if cfg == nil {
                return ""
        }
        if cfg.JobType != "" {
                return strings.ToUpper(cfg.JobType)
        }
        switch {
        case cfg.Query != nil:
                return "QUERY"
        case cfg.Load != nil:
                return "LOAD"
        case cfg.Copy != nil:
                return "COPY"
        case cfg.Extract != nil:
                return "EXTRACT"
        default:
                return ""
        }
}

func queryTextFromConfiguration(cfg *JobConfiguration) string {
        if cfg == nil || cfg.Query == nil {
                return ""
        }
        return cfg.Query.Query
}

func statementTypeFromJob(j *Job) string {
        if j.Result != nil && j.Result.StatementType != "" {
                return j.Result.StatementType
        }
        if j.Statistics.Query != nil && j.Statistics.Query.StatementType != "" {
                return j.Statistics.Query.StatementType
        }
        return ""
}

func destinationTableFromConfiguration(cfg *JobConfiguration) map[string]any {
        if cfg == nil {
                return nil
        }
        var ref *bqtypes.TableReference
        switch {
        case cfg.Query != nil && cfg.Query.DestinationTable != nil:
                ref = cfg.Query.DestinationTable
        case cfg.Load != nil && cfg.Load.DestinationTable != nil:
                ref = cfg.Load.DestinationTable
        case cfg.Copy != nil && cfg.Copy.DestinationTable != nil:
                ref = cfg.Copy.DestinationTable
        }
        if ref == nil {
                return nil
        }
        return map[string]any{
                infoSchemaColProjectID: ref.ProjectID,
                "dataset_id":           ref.DatasetID,
                "table_id":             ref.TableID,
        }
}

func dmlStatsFromJob(j *Job) map[string]any {
        var stats *bqtypes.DmlStats
        if j.Result != nil && j.Result.DmlStats != nil {
                stats = j.Result.DmlStats
        }
        if stats == nil {
                return nil
        }
        return map[string]any{
                "inserted_row_count": parseInt64OrZero(stats.InsertedRowCount),
                "deleted_row_count":  parseInt64OrZero(stats.DeletedRowCount),
                "updated_row_count":  parseInt64OrZero(stats.UpdatedRowCount),
        }
}

func millisToTimestamp(ms string) any {
        if strings.TrimSpace(ms) == "" {
                return nil
        }
        n, err := strconv.ParseInt(ms, 10, 64)
        if err != nil {
                return nil
        }
        return time.UnixMilli(n).UTC().Format("2006-01-02 15:04:05.999999 UTC")
}

func parseInt64OrZero(s string) int64 {
        if strings.TrimSpace(s) == "" {
                return 0
        }
        n, err := strconv.ParseInt(s, 10, 64)
        if err != nil {
                return 0
        }
        return n
}

// Package jobs is the gateway-side, in-memory record of every
// BigQuery job the emulator has accepted in this process. It feeds
// the synchronous `jobs.query` response (the `jobReference` and
// timing statistics it must emit alongside rows) and is the source
// of truth `jobs.get` / `jobs.list` will read from once those land.
//
// Scope today:
//
//   - One process-local Registry per gateway. State is volatile;
//     restarts wipe the table. Spanner-emulator does the same with
//     its in-memory metadata catalog.
//   - Jobs are minted by `jobs.query` (the sync query API) and the
//     sync slice of `jobs.insert` (query / load / copy / extract).
//     Load / copy / extract insert paths dispatch and round-trip
//     configuration but defer byte-level work to plans tp08-04/05.
//   - Jobs are recorded as `DONE` straight away. The emulator runs
//     each query synchronously, so a pending/running window never
//     exists on the wire from the caller's perspective. Async
//     execution lands later when DML / long-running jobs need real
//     lifecycle transitions.
//
// The shape of `Job`, `Status`, and `Statistics` mirrors the subset of
// `https://docs.cloud.google.com/bigquery/docs/reference/rest/v2/jobs#Job`
// the emulator emits today. JSON tags match the upstream wire field
// names so a stored `*Job` round-trips through `jobs.get` without an
// extra translation layer.
package jobs

import (
        "encoding/json"
        "fmt"
        "slices"
        "strconv"
        "strings"
        "sync"
        "sync/atomic"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// JobKind is the value of the `kind` field on a BigQuery Job
// resource. Stable across all job types (query / load / copy /
// extract); the per-configuration discriminator lives under
// `configuration.{query,load,...}` on a real Job, which the
// emulator does not populate yet.
const JobKind = "bigquery#job"

// JobState mirrors the upstream `Job.status.state` enum: PENDING
// (admitted but not yet scheduled), RUNNING (work in progress),
// DONE (terminal -- success or failure determined by `errorResult`).
const (
        JobStatePending = "PENDING"
        JobStateRunning = "RUNNING"
        JobStateDone    = "DONE"
)

// stateFilterAliases maps the lowercase wire spelling BigQuery
// accepts on `?stateFilter=` query parameters to the canonical
// upper-case `Status.State` value stored in the registry. The
// upstream API documents the parameter values as `pending`,
// `running`, and `done`; the response stamps the uppercase variant.
// Centralized here so `ListByProject` and tests share one source.
var stateFilterAliases = map[string]string{
        "pending": JobStatePending,
        "running": JobStateRunning,
        "done":    JobStateDone,
}

// Status mirrors the upstream `JobStatus` resource. ErrorResult is
// populated only when the job terminated with an error; Errors is
// a (potentially empty) list of warnings/errors collected during
// execution. Both are kept omitempty so a successful `jobs.query`
// reply doesn't carry empty arrays / null sentinel objects.
//
// CancelRequested mirrors the upstream `JobStatus.cancelRequested`
// flag the `JobCancel` handler stamps on the response. The gateway
// runs every job synchronously today so the flag flips to true at
// the same instant the entry's state moves to DONE; the field is
// omitempty so jobs that were never cancelled keep the same compact
// wire shape `jobs.query` emits.
type Status struct {
        State           string               `json:"state"`
        ErrorResult     *bqtypes.ErrorProto  `json:"errorResult,omitempty"`
        Errors          []bqtypes.ErrorProto `json:"errors,omitempty"`
        CancelRequested bool                 `json:"cancelRequested,omitempty"`
}

// JobConfiguration mirrors the subset of the upstream
// `JobConfiguration` resource the gateway round-trips through the
// registry. The per-type sub-objects (`Query`, `Load`, `Copy`,
// `Extract`) are the dispatch discriminator at `jobs.insert` time;
// everything else round-trips opaquely so a subsequent `jobs.get`
// echoes back the same shape the caller posted.
type JobConfiguration struct {
        JobType string                   `json:"jobType,omitempty"` // QUERY | LOAD | COPY | EXTRACT
        Query   *JobConfigurationQuery   `json:"query,omitempty"`
        Load    *JobConfigurationLoad    `json:"load,omitempty"`
        Copy    *JobConfigurationCopy    `json:"copy,omitempty"`
        Extract *JobConfigurationExtract `json:"extract,omitempty"`
        Labels  map[string]string        `json:"labels,omitempty"`
        DryRun  bool                     `json:"dryRun,omitempty"`
}

// JobConfigurationQuery is the per-query slice of a JobConfiguration.
// Only fields the gateway currently echoes back on `jobs.get` are
// modelled; the long tail (destination table, scheduling, encryption,
// ...) is deferred until a handler reads them.
type JobConfigurationQuery struct {
        Query                              string                                       `json:"query"`
        DefaultDataset                     *bqtypes.DatasetReference                    `json:"defaultDataset,omitempty"`
        UseLegacySQL                       *bool                                        `json:"useLegacySql,omitempty"`
        ParameterMode                      string                                       `json:"parameterMode,omitempty"`
        QueryParameters                    []bqtypes.QueryParameter                     `json:"queryParameters,omitempty"`
        TableDefinitions                   map[string]bqtypes.ExternalDataConfiguration `json:"tableDefinitions,omitempty"`
        DestinationTable                   *bqtypes.TableReference                      `json:"destinationTable,omitempty"`
        WriteDisposition                   string                                       `json:"writeDisposition,omitempty"`
        SchemaUpdateOptions                []string                                     `json:"schemaUpdateOptions,omitempty"`
        Clustering                         *bqtypes.Clustering                          `json:"clustering,omitempty"`
        TimePartitioning                   *bqtypes.TimePartitioning                    `json:"timePartitioning,omitempty"`
        DestinationEncryptionConfiguration *bqtypes.EncryptionConfiguration             `json:"destinationEncryptionConfiguration,omitempty"`
        CreateSession                      bool                                         `json:"createSession,omitempty"`
        ConnectionProperties               []bqtypes.ConnectionProperty                 `json:"connectionProperties,omitempty"`
}

// UnmarshalJSON accepts writeDisposition as a JSON string or a
// one-element string array (node relaxColumnQueryAppend sample).
func (c *JobConfigurationQuery) UnmarshalJSON(data []byte) error {
        type alias JobConfigurationQuery
        var raw struct {
                alias
                WriteDisposition json.RawMessage `json:"writeDisposition,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *c = JobConfigurationQuery(raw.alias)
        if len(raw.WriteDisposition) == 0 {
                return nil
        }
        wd, err := bqtypes.UnmarshalWriteDisposition(raw.WriteDisposition)
        if err != nil {
                return err
        }
        c.WriteDisposition = wd
        return nil
}

// JobConfigurationLoad is the per-load slice of a JobConfiguration.
// Fields mirror the minimum upstream REST shape thirdparty samples
// exercise; format readers and GCS byte I/O land in plan tp08-04.
type JobConfigurationLoad struct {
        SourceURIs                         []string                         `json:"sourceUris,omitempty"`
        DestinationTable                   *bqtypes.TableReference          `json:"destinationTable,omitempty"`
        SourceFormat                       string                           `json:"sourceFormat,omitempty"`
        WriteDisposition                   string                           `json:"writeDisposition,omitempty"`
        Schema                             *bqtypes.TableSchema             `json:"schema,omitempty"`
        Autodetect                         bool                             `json:"autodetect,omitempty"`
        SchemaUpdateOptions                []string                         `json:"schemaUpdateOptions,omitempty"`
        DestinationEncryptionConfiguration *bqtypes.EncryptionConfiguration `json:"destinationEncryptionConfiguration,omitempty"`
        Clustering                         *bqtypes.Clustering              `json:"clustering,omitempty"`
        TimePartitioning                   *bqtypes.TimePartitioning        `json:"timePartitioning,omitempty"`
        HivePartitioningOptions            *bqtypes.HivePartitioningOptions `json:"hivePartitioningOptions,omitempty"`
        skipLeadingRows                    int                              // set via UnmarshalJSON; REST sends int or string
}

// SkipLeadingRows returns the number of leading CSV rows to skip.
func (c *JobConfigurationLoad) SkipLeadingRows() int {
        if c == nil {
                return 0
        }
        return c.skipLeadingRows
}

// UnmarshalJSON accepts skipLeadingRows as JSON number or decimal string,
// matching the official Python/Node client wire shape. writeDisposition
// may also be posted as a one-element string array.
func (c *JobConfigurationLoad) UnmarshalJSON(data []byte) error {
        type alias JobConfigurationLoad
        var raw struct {
                alias
                SkipLeadingRows  any             `json:"skipLeadingRows,omitempty"`
                WriteDisposition json.RawMessage `json:"writeDisposition,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *c = JobConfigurationLoad(raw.alias)
        if wd, err := bqtypes.UnmarshalWriteDisposition(raw.WriteDisposition); err != nil {
                return err
        } else if wd != "" {
                c.WriteDisposition = wd
        }
        if raw.SkipLeadingRows == nil {
                return nil
        }
        switch v := raw.SkipLeadingRows.(type) {
        case float64:
                c.skipLeadingRows = int(v)
        case string:
                n, err := strconv.Atoi(v)
                if err != nil {
                        return fmt.Errorf("skipLeadingRows: %w", err)
                }
                c.skipLeadingRows = n
        default:
                return fmt.Errorf("skipLeadingRows: unsupported type %T", v)
        }
        return nil
}

// UnmarshalJSON accepts writeDisposition as a string or a
// one-element string array.
func (c *JobConfigurationCopy) UnmarshalJSON(data []byte) error {
        type alias JobConfigurationCopy
        var raw struct {
                alias
                WriteDisposition json.RawMessage `json:"writeDisposition,omitempty"`
        }
        if err := json.Unmarshal(data, &raw); err != nil {
                return err
        }
        *c = JobConfigurationCopy(raw.alias)
        wd, err := bqtypes.UnmarshalWriteDisposition(raw.WriteDisposition)
        if err != nil {
                return err
        }
        c.WriteDisposition = wd
        return nil
}

// JobConfigurationCopy is the per-copy slice of a JobConfiguration.
type JobConfigurationCopy struct {
        SourceTable                        *bqtypes.TableReference          `json:"sourceTable,omitempty"`
        SourceTables                       []bqtypes.TableReference         `json:"sourceTables,omitempty"`
        DestinationTable                   *bqtypes.TableReference          `json:"destinationTable,omitempty"`
        WriteDisposition                   string                           `json:"writeDisposition,omitempty"`
        CreateDisposition                  string                           `json:"createDisposition,omitempty"`
        DestinationEncryptionConfiguration *bqtypes.EncryptionConfiguration `json:"destinationEncryptionConfiguration,omitempty"`
        // OperationType is COPY (default), SNAPSHOT, RESTORE, or CLONE per
        // BigQuery JobConfigurationTableCopy.operationType.
        OperationType string `json:"operationType,omitempty"`
        // DestinationExpirationTime is epoch milliseconds when the destination
        // table expires (decimal string on the wire).
        DestinationExpirationTime string `json:"destinationExpirationTime,omitempty"`
}

// JobConfigurationExtract is the per-extract slice of a JobConfiguration.
type JobConfigurationExtract struct {
        SourceTable       *bqtypes.TableReference `json:"sourceTable,omitempty"`
        DestinationURIs   []string                `json:"destinationUris,omitempty"`
        DestinationFormat string                  `json:"destinationFormat,omitempty"`
        Compression       string                  `json:"compression,omitempty"`
}

// Statistics mirrors the subset of `JobStatistics` the emulator
// currently fills in. All four timestamp / byte fields are decimal
// strings on the wire per
// docs/bigquery/docs/reference/rest/v2/jobs/get.md#JobStatistics --
// even `totalBytesProcessed`, because BigQuery REST never emits
// 64-bit integers as JSON numbers (clients use `string` decoders).
type Statistics struct {
        CreationTime        string                  `json:"creationTime,omitempty"`
        StartTime           string                  `json:"startTime,omitempty"`
        EndTime             string                  `json:"endTime,omitempty"`
        TotalBytesProcessed string                  `json:"totalBytesProcessed,omitempty"`
        ParentJobID         string                  `json:"parentJobId,omitempty"`
        NumChildJobs        string                  `json:"numChildJobs,omitempty"`
        SessionInfo         *bqtypes.SessionInfo    `json:"sessionInfo,omitempty"`
        Query               *bqtypes.JobStatistics2 `json:"query,omitempty"`
        Load                *LoadStatistics         `json:"load,omitempty"`
        Copy                *CopyStatistics         `json:"copy,omitempty"`
        Extract             *ExtractStatistics      `json:"extract,omitempty"`
}

// LoadStatistics mirrors upstream `JobStatistics3` (statistics.load).
type LoadStatistics struct {
        InputFiles     string `json:"inputFiles,omitempty"`
        InputFileBytes string `json:"inputFileBytes,omitempty"`
        OutputRows     string `json:"outputRows,omitempty"`
        OutputBytes    string `json:"outputBytes,omitempty"`
        BadRecords     string `json:"badRecords,omitempty"`
}

// CopyStatistics mirrors upstream `CopyJobStatistics` (statistics.copy).
type CopyStatistics struct {
        CopiedRows         string `json:"copiedRows,omitempty"`
        CopiedLogicalBytes string `json:"copiedLogicalBytes,omitempty"`
}

// ExtractStatistics mirrors upstream `JobStatistics4` (statistics.extract).
type ExtractStatistics struct {
        DestinationURIFileCounts []string `json:"destinationUriFileCounts,omitempty"`
        InputBytes               string   `json:"inputBytes,omitempty"`
}

// QueryResult is the cached result of a synchronous query, kept in
// the registry so a follow-up `jobs.getQueryResults` can replay the
// same schema and rows without re-running the SQL. Schema and Rows
// are stored in the BigQuery REST `f`/`v` shape so the handler can
// emit them verbatim.
//
// The registry holds the entire result set in memory; this matches
// the "single-page only" charter from
// `docs/ENGINE_POLICY.md`. Pagination
// (real `pageToken` lifecycle, cursored reads from a streaming
// engine) is deferred until long-running jobs land.
type QueryResult struct {
        Schema *bqtypes.TableSchema
        Rows   []bqtypes.Row
        // DmlStats is non-nil for an INSERT/UPDATE/DELETE/MERGE job and
        // nil for a SELECT/DDL job. When set, `jobs.getQueryResults`
        // surfaces the same `dmlStats` + `numDmlAffectedRows` envelope
        // the synchronous `jobs.query` response carried, so polling
        // BigQuery clients (e.g. the Go SDK's `JobIterator`) see the
        // row counts on the replay too.
        DmlStats *bqtypes.DmlStats
        // StatementType is the canonical BigQuery REST statement-type
        // string the engine trailed on the `jobs.query` response (e.g.
        // `SELECT`, `INSERT`, `CREATE_TABLE`). Stashed on the cached
        // result so `jobs.getQueryResults` can re-surface the same
        // `Job.statistics.query.statementType` envelope on the replay
        // without re-running the SQL.
        StatementType string
        // EmulatorRoute is the canonical lowercase-snake disposition
        // string the C++ coordinator's `RouteClassifier` chose for the
        // original query (`duckdb_native`, `semantic_executor`,
        // `control_op`, ...). It is an emulator-internal debug field;
        // `jobs.getQueryResults` only surfaces it to loopback callers
        // (the call site enforces the gating via
        // `middleware.IsLoopback`) so the public REST shape stays the
        // same.
        EmulatorRoute string
        // EmulatorPhases carries per-phase timings (microseconds) from the
        // engine's phase_timings trailer for loopback replay.
        EmulatorPhases map[string]int64
        // DdlTargetRoutine is set when a CREATE_FUNCTION /
        // CREATE_PROCEDURE DDL statement registers a routine.
        DdlTargetRoutine *bqtypes.RoutineReference
}

// Job is the gateway's view of a single BigQuery job. Today it's
// populated from the sync `jobs.query` path and the sync-query slice
// of `jobs.insert`; the per-type `*Statistics` sub-objects are
// deferred until a handler actually needs them.
//
// Result is the cached query result, populated by
// `CompleteQueryWithResult` and consumed by `jobs.getQueryResults`.
// It is excluded from the JSON encoding because the upstream Job
// resource has no rows/schema field; result data is only emitted
// through the dedicated `QueryResponse`/`GetQueryResultsResponse`
// shapes.
//
// ParentJobID is non-empty for script statement-level jobs spawned
// under a scripting parent, mirroring upstream's
// `Job.statistics.parentJobId`. `JobDelete` cascades by removing
// every entry whose ParentJobID matches the requested jobId so a
// scripting parent's children disappear in one call.
//
// CancelRequested mirrors the upstream `Job.status.cancelRequested`
// flag the `JobCancel` handler stamps on the response envelope. The
// gateway runs every job synchronously today so the flag flips to
// true at the same instant the entry's state moves to CANCELLED;
// once a long-running execution lane lands the flag's pre-flip
// observation window will widen.
//
// Configuration is the round-trip copy of the inbound
// `configuration` body so `jobs.get` / `jobs.list` echo back the
// same fields the caller posted at `jobs.insert` time. Sync
// `jobs.query` calls (which do not go through `jobs.insert`) leave
// it nil; clients reading those entries see no `configuration`
// field, matching the upstream behavior.
type Job struct {
        Kind          string               `json:"kind,omitempty"`
        ID            string               `json:"id,omitempty"`
        JobReference  bqtypes.JobReference `json:"jobReference"`
        Status        Status               `json:"status"`
        Statistics    Statistics           `json:"statistics"`
        Configuration *JobConfiguration    `json:"configuration,omitempty"`
        UserEmail     string               `json:"user_email,omitempty"`
        // ParentJobID is the registry's link to a scripting parent. It
        // is round-tripped under `Statistics.parentJobId` once the per-
        // type statistics envelope ships; today it stays an internal
        // link so `JobDelete` can cascade by parent-id without growing
        // a separate scripting index. JSON tag is `-` so the field does
        // not yet appear on the wire.
        ParentJobID string       `json:"-"`
        Result      *QueryResult `json:"-"`
}

// Registry is a process-local jobs table keyed by jobId. Reads /
// writes are concurrency-safe via a single sync.RWMutex over an
// ordered slice + map index. We track insertion order on the side
// (the `order` slice) so `ListByProject` can hand back a deterministic
// reverse-chronological page without the caller having to sort an
// arbitrary `sync.Map` walk. The monotonic counter is bumped
// atomically so even within a single nanosecond two requests still
// see distinct ids.
//
// The map only holds successful or terminally-failed jobs today;
// the emulator does not yet maintain a pending queue (see the
// package-level doc for why DONE-on-arrival is fine).
type Registry struct {
        counter atomic.Uint64
        mu      sync.RWMutex
        jobs    map[string]*Job
        // order is the insertion-ordered list of jobIds. `ListByProject`
        // iterates this in reverse to produce a newest-first page, which
        // matches what the BigQuery client libraries (and the upstream
        // `jobs.list` default sort) display. The slice grows on
        // Register / CompleteQuery and shrinks on Delete (linear scan;
        // fine for the per-process volumes the emulator handles, ~10s
        // of jobs in any test run).
        order []string
}

// NewRegistry returns a fresh, empty registry. Each gateway process
// gets one; tests can mint their own per-test for isolation without
// polluting a global.
func NewRegistry() *Registry {
        return &Registry{jobs: map[string]*Job{}}
}

// NewJobID generates a jobId of the form `job_<unix_nanos>_<seq>`.
// The `job_` prefix matches the convention BigQuery and the official
// client libraries use for auto-generated ids (cf.
// `cloud.google.com/go/bigquery`'s `randomIDFn`). The trailing
// monotonic seq guarantees uniqueness even when two requests collide
// on the same nanosecond, which can happen on coarse-resolution
// clocks (Windows) under heavy concurrency.
func (r *Registry) NewJobID() string {
        seq := r.counter.Add(1)
        return "job_" +
                strconv.FormatInt(time.Now().UnixNano(), 10) + "_" +
                strconv.FormatUint(seq, 10)
}

// CompleteQuery records a query job that already finished -- the
// happy path for sync `jobs.query`. The returned Job carries a
// freshly minted jobReference plus the canonical Status / Statistics
// the caller stamps into the `QueryResponse`. The same `*Job` is
// stored in the registry so a follow-up `jobs.get` can return it
// verbatim.
//
// projectID flows from the URL path. location comes from the
// QueryRequest body (empty when the client did not specify one);
// matching BigQuery, the registry never invents a location.
// totalBytesProcessed reflects how many bytes the engine reported
// scanning -- 0 is acceptable when the engine has not wired the
// metric yet.
func (r *Registry) CompleteQuery(
        projectID, location string,
        totalBytesProcessed int64,
        start, end time.Time,
) *Job {
        return r.CompleteQueryWithResult(
                projectID, location, totalBytesProcessed, start, end, nil)
}

// CompleteQueryWithResult records a finished query job along with the
// schema + rows the engine produced. The result is cached on the Job
// so `jobs.getQueryResults` can replay it without re-running the SQL.
// Pass `result == nil` when no rows are available (the same behavior
// as `CompleteQuery`).
func (r *Registry) CompleteQueryWithResult(
        projectID, location string,
        totalBytesProcessed int64,
        start, end time.Time,
        result *QueryResult,
) *Job {
        jobID := r.NewJobID()
        j := &Job{
                Kind: JobKind,
                ID:   projectID + ":" + jobID,
                JobReference: bqtypes.JobReference{
                        ProjectID: projectID,
                        JobID:     jobID,
                        Location:  location,
                },
                Status: Status{State: JobStateDone},
                Statistics: Statistics{
                        CreationTime:        millisString(start),
                        StartTime:           millisString(start),
                        EndTime:             millisString(end),
                        TotalBytesProcessed: strconv.FormatInt(totalBytesProcessed, 10),
                },
                Result: result,
        }
        r.Register(j)
        return j
}

// Register inserts j into the registry under its JobReference.JobID.
// If a job with the same id is already present the call is a no-op
// (the existing pointer is preserved). `CompleteQueryWithResult`
// flows through here so the sync-query and async-insert paths share
// one writer. Tests that need a hand-built Job (e.g. to seed a
// non-DONE entry the cancel/delete handlers will read back) can also
// call this directly.
func (r *Registry) Register(j *Job) {
        if j == nil {
                return
        }
        id := j.JobReference.JobID
        if id == "" {
                return
        }
        r.mu.Lock()
        defer r.mu.Unlock()
        if _, exists := r.jobs[id]; exists {
                return
        }
        r.jobs[id] = j
        r.order = append(r.order, id)
}

// Get returns the Job recorded under jobID, or (nil, false) if no
// such job is in the registry. Used by `jobs.get` /
// `jobs.getQueryResults`.
func (r *Registry) Get(jobID string) (*Job, bool) {
        r.mu.RLock()
        defer r.mu.RUnlock()
        j, ok := r.jobs[jobID]
        return j, ok
}

// ListOptions captures the documented `jobs.list` query parameters
// the handler exposes today. Empty / zero-valued fields are treated
// as "no filter". PageToken is the opaque cursor `ListByProject`
// hands back; the handler does not need to interpret it.
type ListOptions struct {
        MaxResults      int
        PageToken       string
        ParentJobID     string
        MinCreationTime int64 // millis since epoch; 0 = unbounded
        MaxCreationTime int64 // millis since epoch; 0 = unbounded
        StateFilter     []string
}

// ListByProject returns the page of jobs belonging to projectID that
// match the supplied options. Results are ordered newest-first
// (mirroring `bigquery.jobs.list`'s default sort) and pagination is
// cursor-based: the returned nextPageToken is opaque to the caller
// and feeds straight back into `ListOptions.PageToken` for the next
// page. When no more pages remain the token is empty.
//
// `MaxResults <= 0` means "the documented default cap" (50 today;
// upstream picks the same number when callers omit the field).
func (r *Registry) ListByProject(projectID string, opts ListOptions) (
        jobs []*Job, nextPageToken string,
) {
        r.mu.RLock()
        defer r.mu.RUnlock()

        maxResults := opts.MaxResults
        if maxResults <= 0 {
                maxResults = defaultListMaxResults
        }
        stateFilters := normalizeStateFilters(opts.StateFilter)
        startIdx, _ := strconv.Atoi(opts.PageToken)
        skipped := 0
        jobs = make([]*Job, 0, maxResults)
        // Walk newest-first by iterating `order` in reverse; this matches
        // `bigquery.jobs.list`'s default sort. The cursor token is the
        // count of newest-first jobs the caller has already consumed so
        // resuming a page just means continuing past them.
        for _, v := range slices.Backward(r.order) {
                j := r.jobs[v]
                if !jobMatchesProject(j, projectID, opts, stateFilters) {
                        continue
                }
                if skipped < startIdx {
                        skipped++
                        continue
                }
                if len(jobs) >= maxResults {
                        nextPageToken = strconv.Itoa(startIdx + len(jobs))
                        return jobs, nextPageToken
                }
                jobs = append(jobs, j)
        }
        return jobs, ""
}

// defaultListMaxResults bounds the per-page result count when the
// caller leaves `MaxResults` zero. Upstream's documented default is
// 50; matching it avoids surprises for clients that probe the
// emulator before passing an explicit cap.
const defaultListMaxResults = 50

// jobMatchesProject is the per-entry filter `ListByProject` runs
// against the iteration. Hoisted out so the page loop stays a
// straight cursor without nested ifs (cyclop / nestif caps).
func jobMatchesProject(j *Job, projectID string, opts ListOptions, stateFilters map[string]bool) bool {
        if j.JobReference.ProjectID != projectID {
                return false
        }
        if opts.ParentJobID != "" && j.ParentJobID != opts.ParentJobID {
                return false
        }
        if len(stateFilters) != 0 && !stateFilters[j.Status.State] {
                return false
        }
        creation, _ := strconv.ParseInt(j.Statistics.CreationTime, 10, 64)
        if opts.MinCreationTime != 0 && creation < opts.MinCreationTime {
                return false
        }
        if opts.MaxCreationTime != 0 && creation > opts.MaxCreationTime {
                return false
        }
        return true
}

// normalizeStateFilters folds the caller-provided wire spellings
// (`pending` / `running` / `done`) into a set keyed by the canonical
// `Status.State` value the registry stores. Unknown spellings are
// dropped on the floor (the upstream API documents the parameter
// values explicitly and a typo should not silently broaden a query).
// Returns nil for the no-filter case so the per-entry filter knows
// to skip the state check entirely.
func normalizeStateFilters(in []string) map[string]bool {
        if len(in) == 0 {
                return nil
        }
        out := make(map[string]bool, len(in))
        for _, raw := range in {
                if canon, ok := stateFilterAliases[strings.ToLower(strings.TrimSpace(raw))]; ok {
                        out[canon] = true
                }
        }
        if len(out) == 0 {
                return nil
        }
        return out
}

// Cancel flips the named job from PENDING/RUNNING to DONE +
// CancelRequested=true and reports the updated entry. Idempotent on
// terminal states (DONE jobs come back with their existing status
// untouched, only CancelRequested set). The bool is false when the
// jobId is unknown so the handler can return a 404 with a
// BigQuery-shaped envelope; the error message is BigQuery's
// canonical "Not found: Job" wording so the caller can forward it
// verbatim.
func (r *Registry) Cancel(jobID string) (*Job, bool) {
        r.mu.Lock()
        defer r.mu.Unlock()
        j, ok := r.jobs[jobID]
        if !ok {
                return nil, false
        }
        j.Status.CancelRequested = true
        if j.Status.State != JobStateDone {
                j.Status.State = JobStateDone
                if j.Statistics.EndTime == "" {
                        j.Statistics.EndTime = millisString(time.Now().UTC())
                }
        }
        return j, true
}

// Delete removes jobID from the registry. When the job is a script
// parent every entry whose ParentJobID matches cascades out in the
// same call so the upstream contract -- "deleting a parent removes
// its children" -- holds without an extra round-trip. Returns false
// when the jobId is unknown.
func (r *Registry) Delete(jobID string) bool {
        r.mu.Lock()
        defer r.mu.Unlock()
        if _, ok := r.jobs[jobID]; !ok {
                return false
        }
        r.removeLocked(jobID)
        // Cascade children. Walk a snapshot of the order slice so we
        // don't iterate the underlying storage while mutating it.
        for _, id := range append([]string(nil), r.order...) {
                if child, ok := r.jobs[id]; ok && child.ParentJobID == jobID {
                        r.removeLocked(id)
                }
        }
        return true
}

// removeLocked drops id from both `jobs` and `order`. Must be called
// with `mu` already held write-locked. The slice splice is a linear
// scan + copy; fine for the per-process volumes the emulator
// handles. If a future load lane pushes registry size into the
// thousands this can be replaced with a doubly-linked list, but the
// extra book-keeping is not warranted today.
func (r *Registry) removeLocked(id string) {
        delete(r.jobs, id)
        for i, entry := range r.order {
                if entry == id {
                        r.order = append(r.order[:i], r.order[i+1:]...)
                        return
                }
        }
}

// millisString converts t to BigQuery's wire timestamp format:
// decimal milliseconds since the Unix epoch. Used for all four
// `creationTime` / `startTime` / `endTime` / `totalBytesProcessed`-
// adjacent timestamps emitted in `Statistics`.
func millisString(t time.Time) string {
        return strconv.FormatInt(t.UnixMilli(), 10)
}

// FormatDryRunBytesProcessed renders estimated bytes as the decimal
// string BigQuery REST emits for dry-run jobs. Client libraries treat
// an empty or zero counter as missing; upstream dry-run samples assert
// a positive value, so zero engine estimates surface as "1".
func FormatDryRunBytesProcessed(estimated int64) string {
        if estimated <= 0 {
                return "1"
        }
        return strconv.FormatInt(estimated, 10)
}

// ApplyDryRunStatistics stamps the DONE dry-run terminus on a query
// job, mirroring both statistics.totalBytesProcessed and the nested
// statistics.query.totalBytesProcessed envelope QueryJob reads.
func ApplyDryRunStatistics(job *Job, estimated int64, start, end time.Time) {
        if job == nil {
                return
        }
        bytes := FormatDryRunBytesProcessed(estimated)
        job.Statistics.StartTime = millisString(start)
        job.Statistics.EndTime = millisString(end)
        job.Statistics.TotalBytesProcessed = bytes
        job.Statistics.Query = &bqtypes.JobStatistics2{TotalBytesProcessed: bytes}
}

package load

import (
        "context"
        "errors"
        "fmt"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

// Result captures load-job statistics for jobs.insert responses.
type Result struct {
        InputFiles     int
        InputFileBytes int64
        OutputRows     int64
        OutputBytes    int64
}

// Execute runs a synchronous LOAD job against the engine catalog.
func Execute(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationLoad,
        defaultProject string,
) (Result, error) {
        return execute(ctx, catalog, cfg, defaultProject, nil)
}

// ExecuteFromBytes runs a LOAD job using inline upload bytes instead of sourceUris.
func ExecuteFromBytes(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationLoad,
        defaultProject string, media []byte,
) (Result, error) {
        return execute(ctx, catalog, cfg, defaultProject, [][]byte{media})
}

func execute(ctx context.Context, catalog enginepb.CatalogClient, cfg *jobs.JobConfigurationLoad,
        defaultProject string, inline [][]byte,
) (Result, error) {
        if cfg == nil {
                return Result{}, errors.New("load configuration is required")
        }
        if cfg.DestinationTable == nil || cfg.DestinationTable.TableID == "" {
                return Result{}, errors.New("destinationTable.tableId is required")
        }
        if len(cfg.SourceURIs) == 0 && len(inline) == 0 {
                return Result{}, errors.New("sourceUris or upload media is required")
        }

        projectID := cfg.DestinationTable.ProjectID
        if projectID == "" {
                projectID = defaultProject
        }
        datasetID := cfg.DestinationTable.DatasetID
        tableID := cfg.DestinationTable.TableID

        parseSchema := cfg.Schema
        if parseSchema == nil || len(parseSchema.Fields) == 0 {
                if !cfg.Autodetect {
                        parseSchema = existingDestinationSchema(ctx, catalog, projectID, datasetID, tableID)
                }
        }
        parsed, totalBytes, inputFiles, err := parseLoadSources(ctx, cfg, inline, parseSchema)
        if err != nil {
                return Result{}, err
        }

        if err = EnsureDataset(ctx, catalog, projectID, datasetID); err != nil {
                return Result{}, err
        }
        protoSchema, err := resolveDestinationSchema(ctx, catalog, cfg, projectID, datasetID, tableID, parsed.Schema)
        if err != nil {
                return Result{}, err
        }
        if protoSchema == nil {
                protoSchema = SchemaToProto(parsed.Schema)
        }
        if err = applyWriteDisposition(ctx, catalog, cfg, projectID, datasetID, tableID, protoSchema); err != nil {
                return Result{}, err
        }

        ref := seed.TableRef{ProjectID: projectID, DatasetID: datasetID, TableID: tableID}
        applier := seed.NewCatalogApplier(catalog)
        inserted, err := applier.InsertRows(ctx, ref, protoSchema, parsed.Rows)
        if err != nil {
                return Result{}, err
        }

        return Result{
                InputFiles:     inputFiles,
                InputFileBytes: totalBytes,
                OutputRows:     int64(inserted),
                OutputBytes:    totalBytes,
        }, nil
}

func parseLoadSources(ctx context.Context, cfg *jobs.JobConfigurationLoad, inline [][]byte,
        parseSchema *bqtypes.TableSchema,
) (parsed ParsedRows, totalBytes int64, inputFiles int, err error) {
        if len(inline) > 0 {
                return parseInlineSources(cfg, inline, parseSchema)
        }
        return parseURISources(ctx, cfg, parseSchema)
}

func parseInlineSources(cfg *jobs.JobConfigurationLoad, inline [][]byte,
        parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
        var parsed ParsedRows
        var totalBytes int64
        for i, data := range inline {
                totalBytes += int64(len(data))
                chunk, err := ParseSource(cfg.SourceFormat, data, parseSchema, cfg.SkipLeadingRows(), cfg.Autodetect)
                if err != nil {
                        return ParsedRows{}, 0, 0, err
                }
                parsed = mergeParsedChunk(parsed, chunk, i == 0)
        }
        return parsed, totalBytes, len(inline), nil
}

func parseURISources(ctx context.Context, cfg *jobs.JobConfigurationLoad,
        parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
        if cfg.HivePartitioningOptions != nil {
                return parseHiveURISources(ctx, cfg, parseSchema)
        }
        sourceFormat := strings.ToUpper(strings.TrimSpace(cfg.SourceFormat))
        if sourceFormat == "" {
                sourceFormat = inferSourceFormatFromURIs(cfg.SourceURIs)
        }
        if sourceFormat == sourceFormatDatastoreBackup {
                cfgCopy := *cfg
                cfgCopy.SourceFormat = sourceFormat
                return parseDatastoreBackupSources(ctx, &cfgCopy, parseSchema)
        }
        uris, err := ExpandSourceURIs(ctx, cfg.SourceURIs)
        if err != nil {
                return ParsedRows{}, 0, 0, err
        }
        var parsed ParsedRows
        var totalBytes int64
        for i, uri := range uris {
                data, err := FetchSource(ctx, uri)
                if err != nil {
                        return ParsedRows{}, 0, 0, err
                }
                totalBytes += int64(len(data))
                chunk, err := ParseSource(sourceFormat, data, parseSchema, cfg.SkipLeadingRows(), cfg.Autodetect)
                if err != nil {
                        return ParsedRows{}, 0, 0, err
                }
                parsed = mergeParsedChunk(parsed, chunk, i == 0)
        }
        return parsed, totalBytes, len(uris), nil
}

func inferSourceFormatFromURIs(uris []string) string {
        for _, uri := range uris {
                if strings.HasSuffix(uri, ".export_metadata") {
                        return "DATASTORE_BACKUP"
                }
        }
        return ""
}

func mergeParsedChunk(acc, chunk ParsedRows, first bool) ParsedRows {
        if first {
                return chunk
        }
        acc.Rows = append(acc.Rows, chunk.Rows...)
        return acc
}

// EnsureDestinationTable applies write-disposition semantics for a
// destination table ref, registering the schema when missing.
func EnsureDestinationTable(ctx context.Context, catalog enginepb.CatalogClient,
        projectID, datasetID, tableID, writeDisposition string, schema *enginepb.TableSchema,
) error {
        cfg := &jobs.JobConfigurationLoad{
                DestinationTable: &bqtypes.TableReference{
                        ProjectID: projectID,
                        DatasetID: datasetID,
                        TableID:   tableID,
                },
                WriteDisposition: writeDisposition,
        }
        return applyWriteDisposition(ctx, catalog, cfg, projectID, datasetID, tableID, schema)
}

// EnsureDataset registers the dataset when missing.
func EnsureDataset(ctx context.Context, catalog enginepb.CatalogClient, projectID, datasetID string) error {
        applier := seed.NewCatalogApplier(catalog)
        _, err := applier.EnsureDataset(ctx, projectID, datasetID, "US")
        return err
}

func applyWriteDisposition(ctx context.Context, catalog enginepb.CatalogClient,
        cfg *jobs.JobConfigurationLoad, projectID, datasetID, tableID string, schema *enginepb.TableSchema,
) error {
        wd := cfg.WriteDisposition
        if wd == "" {
                wd = writeAppend
        }
        tableRef := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }

        exists := tableExists(ctx, catalog, tableRef)

        switch wd {
        case "WRITE_TRUNCATE":
                if exists {
                        if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
                                return fmt.Errorf("WRITE_TRUNCATE drop table: %w", err)
                        }
                }
                _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                        Table:  tableRef,
                        Schema: schema,
                })
                return err
        case "WRITE_EMPTY":
                if exists {
                        return fmt.Errorf("destination table %s.%s.%s is not empty", projectID, datasetID, tableID)
                }
                _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                        Table:  tableRef,
                        Schema: schema,
                })
                return err
        default: // WRITE_APPEND and CREATE_IF_NEEDED semantics
                if !exists {
                        _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                                Table:  tableRef,
                                Schema: schema,
                        })
                        if err != nil && status.Code(err) != codes.AlreadyExists {
                                return err
                        }
                }
                return nil
        }
}

func tableExists(ctx context.Context, catalog enginepb.CatalogClient, ref *enginepb.TableRef) bool {
        _, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
        return err == nil
}

// SchemaToProto converts a REST TableSchema to engine proto form.
func SchemaToProto(s *bqtypes.TableSchema) *enginepb.TableSchema {
        if s == nil {
                return nil
        }
        out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(s.Fields))}
        for i := range s.Fields {
                out.Fields = append(out.Fields, fieldToProto(s.Fields[i]))
        }
        return out
}

func fieldToProto(f bqtypes.TableFieldSchema) *enginepb.FieldSchema {
        out := &enginepb.FieldSchema{
                Name:        f.Name,
                Type:        f.Type,
                Mode:        f.Mode,
                Description: f.Description,
        }
        for i := range f.Fields {
                out.Fields = append(out.Fields, fieldToProto(f.Fields[i]))
        }
        return out
}

// FormatStatistics maps a Result into jobs.LoadStatistics wire counters.
func FormatStatistics(r Result) *jobs.LoadStatistics {
        return &jobs.LoadStatistics{
                InputFiles:     strconv.Itoa(r.InputFiles),
                InputFileBytes: strconv.FormatInt(r.InputFileBytes, 10),
                OutputRows:     strconv.FormatInt(r.OutputRows, 10),
                OutputBytes:    strconv.FormatInt(r.OutputBytes, 10),
                BadRecords:     "0",
        }
}

// Package load implements the data plane for BigQuery LOAD jobs:
// fetch source bytes, parse CSV/JSON, and bulk-insert into the engine catalog.
package load

import (
        "context"
        "errors"
        "fmt"
        "io"
        "net/http"
        "net/url"
        "os"
        "path/filepath"
        "strings"
)

// FetchSource reads all bytes for a load-job source URI. Supports gs://
// (via STORAGE_EMULATOR_HOST or https://storage.googleapis.com) and
// file:// paths for local fixtures.
func FetchSource(ctx context.Context, uri string) ([]byte, error) {
        switch {
        case strings.HasPrefix(uri, "gs://"):
                return fetchGCS(ctx, uri)
        case strings.HasPrefix(uri, "s3://"):
                return fetchS3(ctx, uri)
        case strings.HasPrefix(uri, "file://"):
                path := strings.TrimPrefix(uri, "file://")
                return os.ReadFile(path) //nolint:gosec // LOAD jobs intentionally read caller file:// URIs
        default:
                if filepath.IsAbs(uri) {
                        return os.ReadFile(uri) //nolint:gosec // absolute paths for local load samples
                }
                return nil, fmt.Errorf("unsupported sourceUri scheme: %q", uri)
        }
}

func fetchS3(ctx context.Context, s3URI string) ([]byte, error) {
        endpoint := strings.TrimRight(strings.TrimSpace(os.Getenv("S3_ENDPOINT")), "/")
        if endpoint == "" {
                return nil, errors.New("s3:// load sources require S3_ENDPOINT (dev-only); use gs:// or file:// instead")
        }
        rest := strings.TrimPrefix(s3URI, "s3://")
        slash := strings.Index(rest, "/")
        if slash <= 0 || slash == len(rest)-1 {
                return nil, fmt.Errorf("invalid s3:// uri: %q", s3URI)
        }
        bucket := rest[:slash]
        key := rest[slash+1:]
        mediaURL, err := s3MediaURL(endpoint, bucket, key)
        if err != nil {
                return nil, err
        }

        //nolint:gosec // G704: host/scheme fixed to S3_ENDPOINT; object path from load URI is intentional
        req, err := http.NewRequestWithContext(
                ctx,
                http.MethodGet,
                mediaURL,
                nil,
        )
        if err != nil {
                return nil, err
        }
        //nolint:gosec // G704: dev-only fetch against operator-configured S3_ENDPOINT
        resp, err := http.DefaultClient.Do(req)
        if err != nil {
                return nil, fmt.Errorf("fetch %s: %w", s3URI, err)
        }
        defer func() { _ = resp.Body.Close() }()
        if resp.StatusCode != http.StatusOK {
                body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
                return nil, fmt.Errorf("fetch %s: HTTP %d: %s", s3URI, resp.StatusCode, strings.TrimSpace(string(body)))
        }
        data, err := io.ReadAll(resp.Body)
        if err != nil {
                return nil, fmt.Errorf("read %s: %w", s3URI, err)
        }
        return data, nil
}

// s3MediaURL builds a GET URL under the operator-configured S3_ENDPOINT.
// The host/scheme come only from S3_ENDPOINT, not the load URI.
func s3MediaURL(endpoint, bucket, key string) (string, error) {
        base, err := url.Parse(endpoint)
        if err != nil {
                return "", fmt.Errorf("invalid S3_ENDPOINT %q: %w", endpoint, err)
        }
        if base.Scheme == "" || base.Host == "" {
                return "", fmt.Errorf("invalid S3_ENDPOINT %q: scheme and host required", endpoint)
        }
        return base.JoinPath(bucket, key).String(), nil
}

func fetchGCS(ctx context.Context, gsURI string) ([]byte, error) {
        rest := strings.TrimPrefix(gsURI, "gs://")
        slash := strings.Index(rest, "/")
        if slash <= 0 || slash == len(rest)-1 {
                return nil, fmt.Errorf("invalid gs:// uri: %q", gsURI)
        }
        bucket := rest[:slash]
        object := rest[slash+1:]

        base := storageEmulatorBase()
        mediaURL := fmt.Sprintf("%s/storage/v1/b/%s/o/%s?alt=media",
                base, url.PathEscape(bucket), url.PathEscape(object))

        req, err := http.NewRequestWithContext(ctx, http.MethodGet, mediaURL, nil)
        if err != nil {
                return nil, err
        }
        resp, err := http.DefaultClient.Do(req)
        if err != nil {
                return nil, fmt.Errorf("fetch %s: %w", gsURI, err)
        }
        defer func() { _ = resp.Body.Close() }()
        if resp.StatusCode != http.StatusOK {
                body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
                return nil, fmt.Errorf("fetch %s: HTTP %d: %s", gsURI, resp.StatusCode, strings.TrimSpace(string(body)))
        }
        data, err := io.ReadAll(resp.Body)
        if err != nil {
                return nil, fmt.Errorf("read %s: %w", gsURI, err)
        }
        return data, nil
}

// storageEmulatorBase returns the HTTP origin for GCS JSON API media
// downloads. Mirrors scripts/preflight_node_samples_gcs.sh normalization.
func storageEmulatorBase() string {
        host := strings.TrimSpace(os.Getenv("STORAGE_EMULATOR_HOST"))
        if host == "" {
                port := os.Getenv("FAKE_GCS_PORT")
                if port == "" {
                        port = "4443"
                }
                return "http://127.0.0.1:" + port
        }
        host = strings.TrimPrefix(host, "http://")
        host = strings.TrimPrefix(host, "https://")
        host = strings.TrimPrefix(host, "//")
        if strings.Contains(host, ":") {
                return "http://" + host
        }
        port := os.Getenv("FAKE_GCS_PORT")
        if port == "" {
                port = "4443"
        }
        return "http://" + host + ":" + port
}

package load

import (
        "context"
        "encoding/json"
        "fmt"
        "io"
        "net/http"
        "net/url"
        "strings"
)

// ExpandSourceURIs resolves gs:// wildcards by listing objects from GCS.
// URIs without a '*' pass through unchanged.
func ExpandSourceURIs(ctx context.Context, uris []string) ([]string, error) {
        var out []string
        for _, uri := range uris {
                if !strings.Contains(uri, "*") {
                        out = append(out, uri)
                        continue
                }
                expanded, err := expandWildcardURI(ctx, uri)
                if err != nil {
                        return nil, err
                }
                out = append(out, expanded...)
        }
        return out, nil
}

func expandWildcardURI(ctx context.Context, gsURI string) ([]string, error) {
        bucket, objectPattern, err := splitGSURI(gsURI)
        if err != nil {
                return nil, err
        }
        before, after, ok := strings.Cut(objectPattern, "*")
        if !ok {
                return []string{gsURI}, nil
        }
        prefix := before
        suffix := after

        names, err := ListGCSObjects(ctx, bucket, prefix)
        if err != nil {
                return nil, err
        }
        var matches []string
        for _, name := range names {
                if !matchesGCSWildcard(name, prefix, suffix) {
                        continue
                }
                matches = append(matches, fmt.Sprintf("gs://%s/%s", bucket, name))
        }
        if len(matches) == 0 {
                return nil, fmt.Errorf("no objects matched sourceUri %q", gsURI)
        }
        return matches, nil
}

func matchesGCSWildcard(name, prefix, suffix string) bool {
        if !strings.HasPrefix(name, prefix) {
                return false
        }
        if strings.HasSuffix(name, "/") {
                return false
        }
        rest := name[len(prefix):]
        if suffix != "" {
                if !strings.HasSuffix(rest, suffix) {
                        return false
                }
                rest = rest[:len(rest)-len(suffix)]
        }
        return rest != ""
}

// ListGCSObjects returns object names under bucket with the given prefix.
func ListGCSObjects(ctx context.Context, bucket, prefix string) ([]string, error) {
        base := storageEmulatorBase()
        var names []string
        pageToken := ""
        for {
                q := url.Values{
                        "prefix":     {prefix},
                        "maxResults": {"1000"},
                }
                if pageToken != "" {
                        q.Set("pageToken", pageToken)
                }
                listURL := fmt.Sprintf("%s/storage/v1/b/%s/o?%s",
                        base, url.PathEscape(bucket), q.Encode())

                req, err := http.NewRequestWithContext(ctx, http.MethodGet, listURL, nil)
                if err != nil {
                        return nil, err
                }
                resp, err := http.DefaultClient.Do(req)
                if err != nil {
                        return nil, fmt.Errorf("list gs://%s/%s: %w", bucket, prefix, err)
                }
                body, err := io.ReadAll(resp.Body)
                _ = resp.Body.Close()
                if err != nil {
                        return nil, err
                }
                if resp.StatusCode != http.StatusOK {
                        return nil, fmt.Errorf("list gs://%s/%s: HTTP %d: %s",
                                bucket, prefix, resp.StatusCode, strings.TrimSpace(string(body)))
                }

                var page struct {
                        Items []struct {
                                Name string `json:"name"`
                        } `json:"items"`
                        NextPageToken string `json:"nextPageToken"`
                }
                if err := json.Unmarshal(body, &page); err != nil {
                        return nil, fmt.Errorf("decode list response: %w", err)
                }
                for _, item := range page.Items {
                        if item.Name != "" {
                                names = append(names, item.Name)
                        }
                }
                pageToken = page.NextPageToken
                if pageToken == "" {
                        break
                }
        }
        return names, nil
}

func splitGSURI(gsURI string) (bucket, object string, err error) {
        if !strings.HasPrefix(gsURI, "gs://") {
                return "", "", fmt.Errorf("invalid gs:// uri: %q", gsURI)
        }
        rest := strings.TrimPrefix(gsURI, "gs://")
        slash := strings.Index(rest, "/")
        if slash <= 0 {
                return "", "", fmt.Errorf("invalid gs:// uri: %q", gsURI)
        }
        return rest[:slash], rest[slash+1:], nil
}

// ObjectPathFromURI returns the object path within its bucket.
func ObjectPathFromURI(gsURI string) (string, error) {
        _, object, err := splitGSURI(gsURI)
        return object, err
}

// BucketFromURI returns the bucket name from a gs:// URI.
func BucketFromURI(gsURI string) (string, error) {
        bucket, _, err := splitGSURI(gsURI)
        return bucket, err
}

package load

import (
        "context"
        "errors"
        "fmt"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

const (
        hiveModeCustom                = "CUSTOM"
        hiveModeAuto                  = "AUTO"
        hiveModeStrings               = "STRINGS"
        defaultHivePartitionFieldType = "STRING"
)

type hivePartitionField struct {
        Name string
        Type string
}

func parseHiveURISources(ctx context.Context, cfg *jobs.JobConfigurationLoad,
        parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
        ext := &bqtypes.ExternalDataConfiguration{
                SourceURIs:              cfg.SourceURIs,
                SourceFormat:            cfg.SourceFormat,
                Schema:                  parseSchema,
                Autodetect:              cfg.Autodetect,
                HivePartitioningOptions: cfg.HivePartitioningOptions,
        }
        parsed, totalBytes, inputFiles, err := parseExternalGCS(ctx, ext, parseSchema, cfg.SkipLeadingRows())
        return parsed, totalBytes, inputFiles, err
}

// ParseExternalGCS fetches GCS objects (including wildcards) and applies hive
// partition columns when configured. Shared by load jobs and external tables.
func ParseExternalGCS(
        ctx context.Context,
        cfg *bqtypes.ExternalDataConfiguration,
        schema *bqtypes.TableSchema,
        skipLeading int,
) (ParsedRows, int64, int, error) {
        return parseExternalGCS(ctx, cfg, schema, skipLeading)
}

func parseExternalGCS(
        ctx context.Context,
        cfg *bqtypes.ExternalDataConfiguration,
        schema *bqtypes.TableSchema,
        skipLeading int,
) (ParsedRows, int64, int, error) {
        uris, err := ExpandSourceURIs(ctx, cfg.SourceURIs)
        if err != nil {
                return ParsedRows{}, 0, 0, err
        }

        var partitionFields []hivePartitionField
        if cfg.HivePartitioningOptions != nil {
                partitionFields, err = resolveHivePartitionFields(cfg.HivePartitioningOptions)
                if err != nil {
                        return ParsedRows{}, 0, 0, err
                }
        }

        var parsed ParsedRows
        var totalBytes int64
        for i, uri := range uris {
                data, err := FetchSource(ctx, uri)
                if err != nil {
                        return ParsedRows{}, 0, 0, err
                }
                totalBytes += int64(len(data))
                chunk, err := ParseSource(cfg.SourceFormat, data, schema, skipLeading, cfg.Autodetect)
                if err != nil {
                        return ParsedRows{}, 0, 0, err
                }
                if cfg.HivePartitioningOptions != nil {
                        partitionValues, partErr := extractHivePartitions(uri, cfg.HivePartitioningOptions)
                        if partErr != nil {
                                return ParsedRows{}, 0, 0, partErr
                        }
                        applyHivePartitions(chunk.Rows, partitionValues)
                        if len(partitionFields) == 0 && len(partitionValues) > 0 {
                                partitionFields = partitionFieldsFromValues(partitionValues, cfg.HivePartitioningOptions)
                        }
                }
                parsed = mergeParsedChunk(parsed, chunk, i == 0)
        }
        if cfg.HivePartitioningOptions != nil && len(partitionFields) > 0 {
                parsed.Schema = mergeHiveSchema(parsed.Schema, partitionFields)
        }
        return parsed, totalBytes, len(uris), nil
}

func applyHivePartitions(rows []map[string]any, partitionValues map[string]string) {
        for _, row := range rows {
                for k, v := range partitionValues {
                        row[k] = v
                }
        }
}

func mergeHiveSchema(dataSchema *bqtypes.TableSchema, partitionFields []hivePartitionField) *bqtypes.TableSchema {
        if len(partitionFields) == 0 {
                return dataSchema
        }
        existing := map[string]struct{}{}
        if dataSchema != nil {
                for _, f := range dataSchema.Fields {
                        existing[f.Name] = struct{}{}
                }
        }
        out := &bqtypes.TableSchema{}
        if dataSchema != nil {
                out.Fields = append(out.Fields, dataSchema.Fields...)
        }
        for _, pf := range partitionFields {
                if _, ok := existing[pf.Name]; ok {
                        continue
                }
                out.Fields = append(out.Fields, bqtypes.TableFieldSchema{
                        Name: pf.Name,
                        Type: pf.Type,
                })
        }
        return out
}

func resolveHivePartitionFields(opts *bqtypes.HivePartitioningOptions) ([]hivePartitionField, error) {
        if opts == nil {
                return nil, nil
        }
        mode := strings.ToUpper(strings.TrimSpace(opts.Mode))
        switch mode {
        case hiveModeCustom:
                _, _, fields, err := parseHiveCustomPrefix(opts.SourceURIPrefix)
                return fields, err
        case hiveModeAuto, hiveModeStrings:
                if strings.TrimSpace(opts.SourceURIPrefix) == "" {
                        return nil, errors.New("hive AUTO/STRINGS mode requires sourceUriPrefix")
                }
                return nil, nil
        default:
                return nil, fmt.Errorf("unsupported hive partitioning mode %q", opts.Mode)
        }
}

func extractHivePartitions(objectURI string, opts *bqtypes.HivePartitioningOptions) (map[string]string, error) {
        if opts == nil {
                return nil, nil
        }
        mode := strings.ToUpper(strings.TrimSpace(opts.Mode))
        switch mode {
        case hiveModeCustom:
                return extractCustomPartitions(objectURI, opts.SourceURIPrefix)
        case hiveModeAuto, hiveModeStrings:
                return extractAutoPartitions(objectURI, opts.SourceURIPrefix)
        default:
                return nil, fmt.Errorf("unsupported hive partitioning mode %q", opts.Mode)
        }
}

func parseHiveCustomPrefix(template string) (bucket, pathPrefix string, fields []hivePartitionField, err error) {
        if !strings.HasPrefix(template, "gs://") {
                return "", "", nil, errors.New("sourceUriPrefix must be a gs:// URI")
        }
        rest := strings.TrimPrefix(template, "gs://")
        slash := strings.Index(rest, "/")
        if slash <= 0 {
                return "", "", nil, errors.New("invalid sourceUriPrefix")
        }
        bucket = rest[:slash]
        pathTemplate := rest[slash+1:]

        var prefix strings.Builder
        for i := 0; i < len(pathTemplate); {
                if pathTemplate[i] == '{' {
                        close := strings.Index(pathTemplate[i:], "}")
                        if close < 0 {
                                return "", "", nil, errors.New("unclosed { in sourceUriPrefix")
                        }
                        inner := pathTemplate[i+1 : i+close]
                        parts := strings.SplitN(inner, ":", 2)
                        if len(parts) != 2 || parts[0] == "" || parts[1] == "" {
                                return "", "", nil, fmt.Errorf("invalid hive field %q in sourceUriPrefix", inner)
                        }
                        fields = append(fields, hivePartitionField{Name: parts[0], Type: parts[1]})
                        i += close + 1
                        if i < len(pathTemplate) && pathTemplate[i] == '/' {
                                i++
                        }
                        continue
                }
                prefix.WriteByte(pathTemplate[i])
                i++
        }
        return bucket, prefix.String(), fields, nil
}

func extractCustomPartitions(objectURI, sourceURIPrefix string) (map[string]string, error) {
        bucket, pathPrefix, fields, err := parseHiveCustomPrefix(sourceURIPrefix)
        if err != nil {
                return nil, err
        }
        if len(fields) == 0 {
                return nil, errors.New("CUSTOM hive mode requires partition fields in sourceUriPrefix")
        }
        objPath, err := ObjectPathFromURI(objectURI)
        if err != nil {
                return nil, err
        }
        objBucket, err := BucketFromURI(objectURI)
        if err != nil {
                return nil, err
        }
        if objBucket != bucket {
                return nil, fmt.Errorf("object bucket %q does not match sourceUriPrefix bucket %q", objBucket, bucket)
        }
        if !strings.HasPrefix(objPath, pathPrefix) {
                return nil, fmt.Errorf("object %q does not match hive prefix %q", objectURI, sourceURIPrefix)
        }
        remainder := strings.TrimPrefix(objPath, pathPrefix)
        segments := strings.Split(remainder, "/")
        if len(segments) < len(fields)+1 {
                return nil, fmt.Errorf("object %q has too few path segments for hive layout", objectURI)
        }
        partSegments := segments[:len(segments)-1]
        out := make(map[string]string, len(fields))
        for i, field := range fields {
                seg := partSegments[i]
                before, after, ok := strings.Cut(seg, "=")
                if ok {
                        if before != field.Name {
                                return nil, fmt.Errorf("partition segment %q, want key %q", seg, field.Name)
                        }
                        out[field.Name] = after
                } else {
                        out[field.Name] = seg
                }
        }
        return out, nil
}

func extractAutoPartitions(objectURI, sourceURIPrefix string) (map[string]string, error) {
        prefixPath, err := ObjectPathFromURI(sourceURIPrefix)
        if err != nil {
                return nil, err
        }
        if !strings.HasSuffix(prefixPath, "/") {
                prefixPath += "/"
        }
        objPath, err := ObjectPathFromURI(objectURI)
        if err != nil {
                return nil, err
        }
        if !strings.HasPrefix(objPath, prefixPath) {
                return nil, fmt.Errorf("object %q does not match hive prefix %q", objectURI, sourceURIPrefix)
        }
        remainder := strings.TrimPrefix(objPath, prefixPath)
        segments := strings.Split(remainder, "/")
        if len(segments) < 2 {
                return nil, fmt.Errorf("object %q has no partition segments", objectURI)
        }
        partSegments := segments[:len(segments)-1]
        out := make(map[string]string, len(partSegments))
        for _, seg := range partSegments {
                before, after, ok := strings.Cut(seg, "=")
                if !ok {
                        return nil, fmt.Errorf("partition segment %q is not key=value", seg)
                }
                out[before] = after
        }
        return out, nil
}

func partitionFieldsFromValues(
        values map[string]string,
        opts *bqtypes.HivePartitioningOptions,
) []hivePartitionField {
        if len(values) == 0 {
                return nil
        }
        fieldType := defaultHivePartitionFieldType
        if strings.EqualFold(strings.TrimSpace(opts.Mode), hiveModeStrings) {
                fieldType = defaultHivePartitionFieldType
        }
        order := opts.Fields
        if len(order) == 0 {
                order = make([]string, 0, len(values))
                for name := range values {
                        order = append(order, name)
                }
        }
        out := make([]hivePartitionField, 0, len(order))
        for _, name := range order {
                if _, ok := values[name]; !ok {
                        continue
                }
                out = append(out, hivePartitionField{Name: name, Type: fieldType})
        }
        return out
}

package load

import "math"

func uint64ToSignedInt64(v uint64) int64 {
        if v > uint64(math.MaxInt64) {
                return math.MaxInt64
        }
        return int64(v)
}

func datastorePropMarker(prop string) []byte {
        n := len(prop)
        if n == 0 || n > 255 {
                return nil
        }
        buf := make([]byte, 2+n)
        buf[0] = 0x1a
        buf[1] = byte(n) //nolint:gosec // n is range-checked to [1,255] above
        copy(buf[2:], prop)
        return buf
}

package load

import (
        "bytes"
        "encoding/csv"
        "encoding/json"
        "errors"
        "fmt"
        "strconv"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

const (
        fieldTypeString   = "STRING"
        fieldTypeInteger  = "INTEGER"
        fieldTypeInt64    = "INT64"
        fieldTypeFloat    = "FLOAT"
        fieldTypeFloat64  = "FLOAT64"
        fieldTypeBoolean  = "BOOLEAN"
        fieldTypeBool     = "BOOL"
        fieldTypeRecord   = "RECORD"
        fieldModeRequired = "REQUIRED"
        fieldModeRepeated = "REPEATED"
        writeAppend       = "WRITE_APPEND"
)

// ParsedRows is the in-memory row batch produced by a format parser.
type ParsedRows struct {
        Schema *bqtypes.TableSchema
        Rows   []map[string]any
}

// ParseSource decodes load bytes according to sourceFormat.
func ParseSource(format string, data []byte, schema *bqtypes.TableSchema,
        skipLeading int, autodetect bool,
) (ParsedRows, error) {
        switch strings.ToUpper(strings.TrimSpace(format)) {
        case "", "CSV":
                return parseCSV(data, schema, skipLeading, autodetect)
        case "NEWLINE_DELIMITED_JSON":
                return parseNDJSON(data, schema, autodetect)
        case "PARQUET":
                return parseParquet(data, schema, autodetect)
        case "AVRO":
                return parseAvro(data, schema, autodetect)
        case "ORC":
                return parseORC(data, schema, autodetect)
        case sourceFormatDatastoreBackup:
                return parseDatastoreEntityBytes(data, schema)
        default:
                return ParsedRows{}, fmt.Errorf("unsupported sourceFormat %q", format)
        }
}

func parseCSV(data []byte, schema *bqtypes.TableSchema, skipLeading int, autodetect bool) (ParsedRows, error) {
        r := csv.NewReader(bytes.NewReader(data))
        r.TrimLeadingSpace = true
        all, err := r.ReadAll()
        if err != nil {
                return ParsedRows{}, fmt.Errorf("parse CSV: %w", err)
        }
        if len(all) <= skipLeading {
                return ParsedRows{Schema: schema, Rows: nil}, nil
        }
        dataRows := all[skipLeading:]
        if schema == nil || len(schema.Fields) == 0 {
                if !autodetect && len(dataRows) > 0 {
                        return ParsedRows{}, errors.New("load job requires schema or autodetect=true for CSV")
                }
                if len(dataRows) == 0 {
                        return ParsedRows{}, nil
                }
                if autodetect && skipLeading > 0 {
                        header := all[skipLeading-1]
                        schema = inferSchemaFromCSVHeader(header, dataRows)
                } else {
                        width := len(dataRows[0])
                        fields := make([]bqtypes.TableFieldSchema, width)
                        for i := range fields {
                                fields[i] = bqtypes.TableFieldSchema{
                                        Name: fmt.Sprintf("string_field_%d", i),
                                        Type: fieldTypeString,
                                }
                        }
                        schema = &bqtypes.TableSchema{Fields: fields}
                }
        }
        fields := schema.Fields
        out := make([]map[string]any, 0, len(dataRows))
        for _, rec := range dataRows {
                row := make(map[string]any, len(fields))
                for i, f := range fields {
                        if i < len(rec) {
                                row[f.Name] = coerceCSVCell(rec[i], f.Type)
                        } else {
                                row[f.Name] = nil
                        }
                }
                out = append(out, row)
        }
        return ParsedRows{Schema: schema, Rows: out}, nil
}

func inferSchemaFromCSVHeader(header []string, dataRows [][]string) *bqtypes.TableSchema {
        fields := make([]bqtypes.TableFieldSchema, len(header))
        for i, name := range header {
                fields[i] = bqtypes.TableFieldSchema{
                        Name: strings.TrimSpace(name),
                        Type: inferCSVColumnType(columnValues(dataRows, i)),
                }
        }
        return &bqtypes.TableSchema{Fields: fields}
}

func columnValues(rows [][]string, col int) []string {
        out := make([]string, 0, len(rows))
        for _, row := range rows {
                if col < len(row) {
                        out = append(out, strings.TrimSpace(row[col]))
                }
        }
        return out
}

func inferCSVColumnType(values []string) string {
        if len(values) == 0 {
                return fieldTypeString
        }
        allInt := true
        for _, v := range values {
                if v == "" {
                        continue
                }
                if _, err := strconv.ParseInt(v, 10, 64); err != nil {
                        allInt = false
                        break
                }
        }
        if allInt {
                return fieldTypeInteger
        }
        return fieldTypeString
}

func coerceCSVCell(raw string, fieldType string) any {
        raw = strings.TrimSpace(raw)
        if raw == "" {
                return nil
        }
        switch strings.ToUpper(strings.TrimSpace(fieldType)) {
        case fieldTypeInteger, "INT64":
                if n, err := strconv.ParseInt(raw, 10, 64); err == nil {
                        return int(n)
                }
        case fieldTypeFloat, "FLOAT64":
                if f, err := strconv.ParseFloat(raw, 64); err == nil {
                        return f
                }
        case fieldTypeBoolean, "BOOL":
                switch strings.ToLower(raw) {
                case "true", "t", "1", "yes":
                        return true
                case "false", "f", "0", "no":
                        return false
                }
        case fieldTypeTimestamp:
                if ts, ok := parseCSVDateTime(raw, true); ok {
                        return ts
                }
        case "DATETIME":
                if ts, ok := parseCSVDateTime(raw, false); ok {
                        return ts
                }
        }
        return raw
}

// parseCSVDateTime parses RFC3339/RFC3339Nano timestamps from CSV cells.
// TIMESTAMP values keep timezone information; DATETIME values are normalized
// to a UTC wall-clock string without a zone suffix.
func parseCSVDateTime(raw string, keepZone bool) (string, bool) {
        layouts := []string{
                time.RFC3339Nano,
                time.RFC3339,
                "2006-01-02 15:04:05.999999 UTC",
                "2006-01-02 15:04:05 UTC",
                "2006-01-02 15:04:05.999999",
                "2006-01-02 15:04:05",
                "2006-01-02T15:04:05",
        }
        for _, layout := range layouts {
                if ts, err := time.Parse(layout, raw); err == nil {
                        if keepZone {
                                return ts.UTC().Format(time.RFC3339Nano), true
                        }
                        return ts.UTC().Format("2006-01-02T15:04:05.999999"), true
                }
        }
        return "", false
}

func parseNDJSON(data []byte, schema *bqtypes.TableSchema, autodetect bool) (ParsedRows, error) {
        lines := bytes.Split(bytes.TrimSpace(data), []byte("\n"))
        out := make([]map[string]any, 0, len(lines))
        for _, line := range lines {
                if len(bytes.TrimSpace(line)) == 0 {
                        continue
                }
                var row map[string]any
                if err := json.Unmarshal(line, &row); err != nil {
                        return ParsedRows{}, fmt.Errorf("parse JSON line: %w", err)
                }
                out = append(out, row)
        }
        if schema == nil || len(schema.Fields) == 0 {
                if !autodetect {
                        return ParsedRows{}, errors.New("load job requires schema or autodetect=true for JSON")
                }
                schema = inferSchemaFromRows(out)
        }
        return ParsedRows{Schema: schema, Rows: out}, nil
}

func inferSchemaFromRows(rows []map[string]any) *bqtypes.TableSchema {
        if len(rows) == 0 {
                return &bqtypes.TableSchema{}
        }
        seen := map[string]struct{}{}
        order := make([]string, 0)
        for _, row := range rows {
                for k := range row {
                        if _, ok := seen[k]; !ok {
                                seen[k] = struct{}{}
                                order = append(order, k)
                        }
                }
        }
        fields := make([]bqtypes.TableFieldSchema, 0, len(order))
        for _, name := range order {
                fields = append(fields, bqtypes.TableFieldSchema{Name: name, Type: fieldTypeString})
        }
        return &bqtypes.TableSchema{Fields: fields}
}

package load

import (
        "bytes"
        "encoding/json"
        "errors"
        "fmt"

        goavro "github.com/linkedin/goavro/v2"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

func parseAvro(data []byte, schema *bqtypes.TableSchema, autodetect bool) (ParsedRows, error) {
        ocf, err := goavro.NewOCFReader(bytes.NewReader(data))
        if err != nil {
                return ParsedRows{}, fmt.Errorf("parse Avro OCF: %w", err)
        }

        avroSchema := ocf.Codec().Schema()
        if schema == nil || len(schema.Fields) == 0 {
                if !autodetect {
                        schema, err = avroJSONSchemaToBQ(avroSchema)
                        if err != nil {
                                return ParsedRows{}, fmt.Errorf("parse Avro schema: %w", err)
                        }
                }
        }

        rows := make([]map[string]any, 0)
        for ocf.Scan() {
                rec, readErr := ocf.Read()
                if readErr != nil {
                        return ParsedRows{}, fmt.Errorf("read Avro row: %w", readErr)
                }
                row, ok := rec.(map[string]any)
                if !ok {
                        return ParsedRows{}, fmt.Errorf("read Avro row: unexpected type %T", rec)
                }
                rows = append(rows, normalizeAvroRow(row))
        }

        if schema == nil || len(schema.Fields) == 0 {
                schema, err = avroJSONSchemaToBQ(avroSchema)
                if err != nil {
                        schema = inferSchemaFromRows(rows)
                }
        }
        return ParsedRows{Schema: schema, Rows: rows}, nil
}

func normalizeAvroRow(row map[string]any) map[string]any {
        if row == nil {
                return map[string]any{}
        }
        out := make(map[string]any, len(row))
        for k, v := range row {
                out[k] = avroValueToAny(v)
        }
        return out
}

func avroValueToAny(v any) any {
        switch val := v.(type) {
        case map[string]any:
                out := make(map[string]any, len(val))
                for k, sub := range val {
                        out[k] = avroValueToAny(sub)
                }
                return out
        case []any:
                out := make([]any, len(val))
                for i, sub := range val {
                        out[i] = avroValueToAny(sub)
                }
                return out
        case []byte:
                return string(val)
        default:
                return val
        }
}

func avroJSONSchemaToBQ(schemaJSON string) (*bqtypes.TableSchema, error) {
        var root any
        if err := json.Unmarshal([]byte(schemaJSON), &root); err != nil {
                return nil, fmt.Errorf("decode Avro schema JSON: %w", err)
        }
        fields, err := avroTypeToBQFields(root)
        if err != nil {
                return nil, err
        }
        return &bqtypes.TableSchema{Fields: fields}, nil
}

func avroTypeToBQFields(node any) ([]bqtypes.TableFieldSchema, error) {
        switch n := node.(type) {
        case map[string]any:
                if typ, ok := n["type"].(string); ok && typ == "record" {
                        rawFields, ok := n["fields"].([]any)
                        if !ok {
                                return nil, errors.New("avro record missing fields")
                        }
                        out := make([]bqtypes.TableFieldSchema, 0, len(rawFields))
                        for _, rf := range rawFields {
                                fm, ok := rf.(map[string]any)
                                if !ok {
                                        return nil, errors.New("avro field entry has unexpected shape")
                                }
                                name, _ := fm["name"].(string)
                                if name == "" {
                                        return nil, errors.New("avro field missing name")
                                }
                                field, err := avroFieldTypeToBQ(name, fm["type"])
                                if err != nil {
                                        return nil, err
                                }
                                out = append(out, field)
                        }
                        return out, nil
                }
                field, err := avroFieldTypeToBQ("", n)
                if err != nil {
                        return nil, err
                }
                if field.Name == "" {
                        return []bqtypes.TableFieldSchema{field}, nil
                }
                return []bqtypes.TableFieldSchema{field}, nil
        default:
                field, err := avroFieldTypeToBQ("", n)
                if err != nil {
                        return nil, err
                }
                return []bqtypes.TableFieldSchema{field}, nil
        }
}

func avroFieldTypeToBQ(name string, node any) (bqtypes.TableFieldSchema, error) {
        typ, mode, nested, err := avroTypeNode(node)
        if err != nil {
                return bqtypes.TableFieldSchema{}, err
        }
        out := bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: nested}
        return out, nil
}

func avroTypeNode(node any) (typ, mode string, nested []bqtypes.TableFieldSchema, err error) {
        switch n := node.(type) {
        case string:
                return avroPrimitiveToBQ(n), "", nil, nil
        case []any:
                nullable := false
                var inner any
                for _, item := range n {
                        if s, ok := item.(string); ok && s == "null" {
                                nullable = true
                                continue
                        }
                        if inner != nil {
                                return "", "", nil, fmt.Errorf("unsupported Avro union: %v", n)
                        }
                        inner = item
                }
                if inner == nil {
                        return "", "", nil, errors.New("avro union has no non-null member")
                }
                typ, mode, nested, err = avroTypeNode(inner)
                if err != nil {
                        return "", "", nil, err
                }
                if nullable && mode == "" {
                        mode = ""
                }
                return typ, mode, nested, nil
        case map[string]any:
                if t, ok := n["type"].(string); ok {
                        switch t {
                        case "array":
                                elemTyp, _, elemNested, err := avroTypeNode(n["items"])
                                if err != nil {
                                        return "", "", nil, err
                                }
                                return elemTyp, fieldModeRepeated, elemNested, nil
                        case "record":
                                fields, err := avroTypeToBQFields(n)
                                if err != nil {
                                        return "", "", nil, err
                                }
                                return fieldTypeRecord, "", fields, nil
                        default:
                                return avroPrimitiveToBQ(t), "", nil, nil
                        }
                }
                return "", "", nil, fmt.Errorf("unsupported Avro type map: %v", n)
        default:
                return "", "", nil, fmt.Errorf("unsupported Avro type node: %T", node)
        }
}

func avroPrimitiveToBQ(avroType string) string {
        switch avroType {
        case "boolean":
                return fieldTypeBoolean
        case "int", "long":
                return fieldTypeInteger
        case "float", "double":
                return fieldTypeFloat
        case "bytes", "fixed":
                return "BYTES"
        default:
                return fieldTypeString
        }
}

package load

import (
        "bytes"
        "context"
        "encoding/json"
        "errors"
        "fmt"
        "regexp"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
)

var datastoreOutputFileRE = regexp.MustCompile(`output-\d+`)

// parseDatastoreBackupSources loads a DATASTORE_BACKUP / Firestore export
// referenced by a *.export_metadata URI, fetches the companion output-* entity
// files, and decodes enough entity properties for the public samples.
func parseDatastoreBackupSources(ctx context.Context, cfg *jobs.JobConfigurationLoad,
        parseSchema *bqtypes.TableSchema,
) (ParsedRows, int64, int, error) {
        if cfg == nil || len(cfg.SourceURIs) == 0 {
                return ParsedRows{}, 0, 0, errors.New("DATASTORE_BACKUP requires sourceUris")
        }
        metaURI := cfg.SourceURIs[0]
        metaBytes, err := FetchSource(ctx, metaURI)
        if err != nil {
                return ParsedRows{}, 0, 0, err
        }
        totalBytes := int64(len(metaBytes))

        trimmed := bytes.TrimSpace(metaBytes)
        if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
                parsed, err := parseDatastoreJSON(trimmed, parseSchema)
                return parsed, totalBytes, 1, err
        }

        baseDir := datastoreBackupBaseDir(metaURI)
        outputs := uniqueStrings(datastoreOutputFileRE.FindAllString(string(metaBytes), -1))
        if len(outputs) == 0 {
                outputs = []string{"output-0"}
        }

        var parsed ParsedRows
        filesRead := 0
        for _, name := range outputs {
                uri := baseDir + name
                data, ferr := FetchSource(ctx, uri)
                if ferr != nil {
                        continue
                }
                totalBytes += int64(len(data))
                filesRead++
                chunk, perr := parseDatastoreEntityBytes(data, parseSchema)
                if perr != nil {
                        return ParsedRows{}, 0, 0, perr
                }
                parsed = mergeParsedChunk(parsed, chunk, filesRead == 1)
        }
        if filesRead == 0 {
                return ParsedRows{}, 0, 0, fmt.Errorf("DATASTORE_BACKUP: no output files found for %q", metaURI)
        }
        return parsed, totalBytes, filesRead, nil
}

func datastoreBackupBaseDir(uri string) string {
        if i := strings.LastIndex(uri, "/"); i >= 0 {
                return uri[:i+1]
        }
        return ""
}

func uniqueStrings(in []string) []string {
        if len(in) == 0 {
                return nil
        }
        seen := make(map[string]struct{}, len(in))
        out := make([]string, 0, len(in))
        for _, s := range in {
                if s == "" {
                        continue
                }
                if _, ok := seen[s]; ok {
                        continue
                }
                seen[s] = struct{}{}
                out = append(out, s)
        }
        return out
}

func parseDatastoreJSON(data []byte, schema *bqtypes.TableSchema) (ParsedRows, error) {
        if len(data) > 0 && data[0] == '[' {
                var rows []map[string]any
                if err := json.Unmarshal(data, &rows); err != nil {
                        return ParsedRows{}, fmt.Errorf("parse DATASTORE_BACKUP JSON array: %w", err)
                }
                return finalizeDatastoreRows(rows, schema), nil
        }
        var doc map[string]any
        if err := json.Unmarshal(data, &doc); err != nil {
                return ParsedRows{}, fmt.Errorf("parse DATASTORE_BACKUP JSON: %w", err)
        }
        if entities, ok := doc["entities"].([]any); ok {
                rows := make([]map[string]any, 0, len(entities))
                for _, ent := range entities {
                        if m, ok := ent.(map[string]any); ok {
                                rows = append(rows, flattenDatastoreEntity(m))
                        }
                }
                return finalizeDatastoreRows(rows, schema), nil
        }
        return finalizeDatastoreRows([]map[string]any{flattenDatastoreEntity(doc)}, schema), nil
}

func flattenDatastoreEntity(ent map[string]any) map[string]any {
        if props, ok := ent["properties"].(map[string]any); ok {
                out := make(map[string]any, len(props))
                for k, v := range props {
                        out[k] = unwrapDatastoreValue(v)
                }
                return out
        }
        return ent
}

func unwrapDatastoreValue(v any) any {
        m, ok := v.(map[string]any)
        if !ok {
                return v
        }
        for _, key := range []string{
                "stringValue", "integerValue", "doubleValue", "booleanValue",
                "timestampValue", "nullValue",
        } {
                raw, ok := m[key]
                if !ok {
                        continue
                }
                if key == "integerValue" {
                        return unwrapDatastoreInteger(raw)
                }
                return raw
        }
        return v
}

func unwrapDatastoreInteger(raw any) any {
        s, ok := raw.(string)
        if !ok {
                return raw
        }
        n, err := strconv.ParseInt(s, 10, 64)
        if err != nil {
                return raw
        }
        return int(n)
}

func parseDatastoreEntityBytes(data []byte, schema *bqtypes.TableSchema) (ParsedRows, error) {
        trimmed := bytes.TrimSpace(data)
        if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
                return parseDatastoreJSON(trimmed, schema)
        }
        rows := scanDatastoreEntities(data)
        return finalizeDatastoreRows(rows, schema), nil
}

func finalizeDatastoreRows(rows []map[string]any, schema *bqtypes.TableSchema) ParsedRows {
        if schema == nil || len(schema.Fields) == 0 {
                schema = inferSchemaFromRows(rows)
        }
        return ParsedRows{Schema: schema, Rows: rows}
}

// scanDatastoreEntities heuristically extracts Firestore/Datastore entity
// properties from LevelDB-encoded export output files. Enough for the public
// us-states backup sample (name, post_abbr, year).
func scanDatastoreEntities(data []byte) []map[string]any {
        var rows []map[string]any
        for i := 0; i < len(data); {
                name, next := readDatastoreStringProp(data, i, "name")
                if next < 0 {
                        break
                }
                abbr, next := readDatastoreStringProp(data, next, "post_abbr")
                if next < 0 {
                        i++
                        continue
                }
                year, next := readDatastoreVarintProp(data, next, "year")
                if next < 0 {
                        i++
                        continue
                }
                if name != "" {
                        row := map[string]any{datastorePropName: name}
                        if abbr != "" {
                                row["post_abbr"] = abbr
                        }
                        if year != 0 {
                                row["year"] = year
                        }
                        rows = append(rows, row)
                }
                i = next
        }
        return rows
}

func readDatastoreStringProp(data []byte, start int, prop string) (string, int) {
        marker := datastorePropMarker(prop)
        if marker == nil {
                return "", -1
        }
        idx := bytes.Index(data[start:], marker)
        if idx < 0 {
                return "", -1
        }
        pos := start + idx + len(marker)
        for pos < len(data) && (data[pos] == ' ' || data[pos] == 0) {
                pos++
        }
        if pos >= len(data) || data[pos] != 0x1a {
                return "", -1
        }
        pos++
        if pos >= len(data) {
                return "", -1
        }
        length := int(data[pos])
        pos++
        if pos+length > len(data) {
                return "", -1
        }
        return string(data[pos : pos+length]), pos + length
}

func readDatastoreVarintProp(data []byte, start int, prop string) (int64, int) {
        marker := datastorePropMarker(prop)
        if marker == nil {
                return 0, -1
        }
        idx := bytes.Index(data[start:], marker)
        if idx < 0 {
                return 0, -1
        }
        pos := start + idx + len(marker)
        for pos < len(data) && (data[pos] == ' ' || data[pos] == 0) {
                pos++
        }
        if pos >= len(data) || data[pos] != 0x08 {
                return 0, -1
        }
        pos++
        var val uint64
        shift := 0
        for pos < len(data) {
                b := data[pos]
                pos++
                val |= uint64(b&0x7f) << shift
                if b < 0x80 {
                        return uint64ToSignedInt64(val), pos
                }
                shift += 7
        }
        return 0, -1
}

package load

import (
        "bytes"
        "errors"
        "fmt"
        "strings"

        "github.com/scritchley/orc"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

func parseORC(data []byte, schema *bqtypes.TableSchema, autodetect bool) (parsed ParsedRows, err error) {
        defer func() {
                if r := recover(); r != nil {
                        err = fmt.Errorf("parse ORC: %v", r)
                }
        }()
        if len(data) < 3 || string(data[:3]) != "ORC" {
                return ParsedRows{}, errors.New("parse ORC: invalid ORC file header")
        }
        r, err := orc.NewReader(bytes.NewReader(data))
        if err != nil {
                return ParsedRows{}, fmt.Errorf("parse ORC: %w", err)
        }
        defer func() { _ = r.Close() }()

        orcSchema := r.Schema()
        cols := orcSchema.Columns()
        if len(cols) == 0 {
                return ParsedRows{}, errors.New("parse ORC: empty schema")
        }

        if schema == nil || len(schema.Fields) == 0 {
                if !autodetect {
                        schema = orcSchemaToBQ(orcSchema)
                }
        }

        cur := r.Select(cols...)
        rows := make([]map[string]any, 0)
        for cur.Stripes() {
                for cur.Next() {
                        vals := cur.Row()
                        row := make(map[string]any, len(cols))
                        for i, col := range cols {
                                if i < len(vals) {
                                        row[col] = orcValueToAny(vals[i])
                                } else {
                                        row[col] = nil
                                }
                        }
                        rows = append(rows, row)
                }
        }

        if schema == nil || len(schema.Fields) == 0 {
                schema = orcSchemaToBQ(orcSchema)
                if schema == nil || len(schema.Fields) == 0 {
                        schema = inferSchemaFromRows(rows)
                }
        }
        return ParsedRows{Schema: schema, Rows: rows}, nil
}

func orcSchemaToBQ(td *orc.TypeDescription) *bqtypes.TableSchema {
        if td == nil {
                return &bqtypes.TableSchema{}
        }
        fields := orcFieldsToBQ(td)
        return &bqtypes.TableSchema{Fields: fields}
}

func orcFieldsToBQ(td *orc.TypeDescription) []bqtypes.TableFieldSchema {
        out := make([]bqtypes.TableFieldSchema, 0, len(td.Columns()))
        for _, name := range td.Columns() {
                child, err := td.GetField(name)
                if err != nil {
                        continue
                }
                out = append(out, orcFieldToBQ(name, child))
        }
        return out
}

func orcFieldToBQ(name string, td *orc.TypeDescription) bqtypes.TableFieldSchema {
        typ, mode, nested := orcTypeStringToBQ(td.String())
        return bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: nested}
}

func orcTypeStringToBQ(typeStr string) (typ, mode string, nested []bqtypes.TableFieldSchema) {
        typeStr = strings.TrimSpace(typeStr)
        if strings.HasPrefix(typeStr, "struct<") && strings.HasSuffix(typeStr, ">") {
                inner := strings.TrimSuffix(strings.TrimPrefix(typeStr, "struct<"), ">")
                return fieldTypeRecord, "", parseORCStructFields(inner)
        }
        if strings.HasPrefix(typeStr, "array<") && strings.HasSuffix(typeStr, ">") {
                inner := strings.TrimSuffix(strings.TrimPrefix(typeStr, "array<"), ">")
                elemTyp, _, _ := orcTypeStringToBQ(inner)
                return elemTyp, fieldModeRepeated, nil
        }
        switch typeStr {
        case "boolean":
                return fieldTypeBoolean, "", nil
        case "tinyint", "smallint", "int", "bigint":
                return fieldTypeInteger, "", nil
        case "float", "double":
                return fieldTypeFloat, "", nil
        case "string", "varchar", "char":
                return fieldTypeString, "", nil
        case "binary":
                return "BYTES", "", nil
        case "date", "timestamp":
                return fieldTypeTimestamp, "", nil
        case "decimal":
                return "NUMERIC", "", nil
        default:
                return fieldTypeString, "", nil
        }
}

func parseORCStructFields(inner string) []bqtypes.TableFieldSchema {
        parts := splitORCStructFields(inner)
        out := make([]bqtypes.TableFieldSchema, 0, len(parts))
        for _, part := range parts {
                colon := strings.Index(part, ":")
                if colon <= 0 {
                        continue
                }
                name := strings.TrimSpace(part[:colon])
                typStr := strings.TrimSpace(part[colon+1:])
                typ, mode, nested := orcTypeStringToBQ(typStr)
                out = append(out, bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: nested})
        }
        return out
}

func splitORCStructFields(inner string) []string {
        var parts []string
        depth := 0
        start := 0
        for i, ch := range inner {
                switch ch {
                case '<':
                        depth++
                case '>':
                        depth--
                case ',':
                        if depth == 0 {
                                parts = append(parts, inner[start:i])
                                start = i + 1
                        }
                }
        }
        if start < len(inner) {
                parts = append(parts, inner[start:])
        }
        return parts
}

func orcValueToAny(v any) any {
        switch val := v.(type) {
        case map[string]any:
                out := make(map[string]any, len(val))
                for k, sub := range val {
                        out[k] = orcValueToAny(sub)
                }
                return out
        case []any:
                out := make([]any, len(val))
                for i, sub := range val {
                        out[i] = orcValueToAny(sub)
                }
                return out
        case []byte:
                return string(val)
        default:
                return val
        }
}

package load

import (
        "bytes"
        "errors"
        "fmt"
        "io"
        "time"

        "github.com/parquet-go/parquet-go"
        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

func parseParquet(data []byte, schema *bqtypes.TableSchema, autodetect bool) (ParsedRows, error) {
        f, err := parquet.OpenFile(bytes.NewReader(data), int64(len(data)))
        if err != nil {
                return ParsedRows{}, fmt.Errorf("parse Parquet: %w", err)
        }

        if schema == nil || len(schema.Fields) == 0 {
                if !autodetect {
                        schema = parquetFileSchemaToBQ(f.Schema())
                }
        }

        reader := parquet.NewReader(f)
        defer func() { _ = reader.Close() }()

        rows := make([]map[string]any, 0, f.NumRows())
        for {
                row := make(map[string]any)
                err := reader.Read(&row)
                if err != nil {
                        if errors.Is(err, io.EOF) {
                                break
                        }
                        return ParsedRows{}, fmt.Errorf("read Parquet rows: %w", err)
                }
                rows = append(rows, normalizeParquetRow(row))
        }

        if schema == nil || len(schema.Fields) == 0 {
                schema = inferSchemaFromRows(rows)
                if schema == nil {
                        schema = parquetFileSchemaToBQ(f.Schema())
                }
        }
        return ParsedRows{Schema: schema, Rows: rows}, nil
}

func normalizeParquetRow(row map[string]any) map[string]any {
        if row == nil {
                return map[string]any{}
        }
        out := make(map[string]any, len(row))
        for k, v := range row {
                out[k] = parquetValueToAny(v)
        }
        return out
}

func parquetValueToAny(v any) any {
        switch val := v.(type) {
        case time.Time:
                return val.UTC().Format(time.RFC3339Nano)
        case *time.Time:
                if val == nil {
                        return nil
                }
                return val.UTC().Format(time.RFC3339Nano)
        case map[string]any:
                out := make(map[string]any, len(val))
                for k, sub := range val {
                        out[k] = parquetValueToAny(sub)
                }
                return out
        case []any:
                out := make([]any, len(val))
                for i, sub := range val {
                        out[i] = parquetValueToAny(sub)
                }
                return out
        case int64:
                // Pandas/pyarrow often stores TIMESTAMP columns as INT64
                // microseconds since Unix epoch in Parquet.
                if val > 1_000_000_000_000 && val < 100_000_000_000_000_000 {
                        return time.UnixMicro(val).UTC().Format(time.RFC3339Nano)
                }
                return val
        case int:
                return val
        default:
                return val
        }
}

func parquetFileSchemaToBQ(s *parquet.Schema) *bqtypes.TableSchema {
        if s == nil {
                return &bqtypes.TableSchema{}
        }
        fields := s.Fields()
        out := make([]bqtypes.TableFieldSchema, 0, len(fields))
        for _, f := range fields {
                out = append(out, parquetFieldToBQ(f))
        }
        return &bqtypes.TableSchema{Fields: out}
}

func parquetFieldToBQ(f parquet.Field) bqtypes.TableFieldSchema {
        name := f.Name()
        typ := parquetNodeTypeToBQ(f)
        mode := ""
        if f.Required() {
                mode = fieldModeRequired
        }
        if f.Repeated() {
                mode = fieldModeRepeated
        }
        nested := f.Fields()
        if len(nested) > 0 && typ == fieldTypeRecord {
                sub := make([]bqtypes.TableFieldSchema, 0, len(nested))
                for _, nf := range nested {
                        sub = append(sub, parquetFieldToBQ(nf))
                }
                return bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode, Fields: sub}
        }
        return bqtypes.TableFieldSchema{Name: name, Type: typ, Mode: mode}
}

func parquetNodeTypeToBQ(f parquet.Field) string {
        if len(f.Fields()) > 0 {
                return fieldTypeRecord
        }
        switch f.Type().String() {
        case fieldTypeBoolean:
                return fieldTypeBoolean
        case "INT32", "INT64", "UINT32", "UINT64":
                return fieldTypeInteger
        case "INT96", fieldTypeTimestamp:
                return fieldTypeTimestamp
        case "FLOAT", "DOUBLE":
                return fieldTypeFloat
        case "BYTE_ARRAY", "FIXED_LEN_BYTE_ARRAY":
                return fieldTypeString
        default:
                if lt := f.Type().LogicalType(); lt != nil && lt.String() == fieldTypeTimestamp {
                        return fieldTypeTimestamp
                }
                return fieldTypeString
        }
}

package load

import (
        "bytes"
        "context"
        "fmt"
        "io"
        "net/http"
        "net/url"
        "strings"
)

// PutGCS uploads object bytes to fake-gcs or the JSON API media endpoint.
func PutGCS(ctx context.Context, gsURI string, contentType string, data []byte) error {
        bucket, object, err := parseGSURI(gsURI)
        if err != nil {
                return err
        }
        base := storageEmulatorBase()
        uploadURL := fmt.Sprintf("%s/upload/storage/v1/b/%s/o?uploadType=media&name=%s",
                base, url.PathEscape(bucket), url.QueryEscape(object))

        req, err := http.NewRequestWithContext(ctx, http.MethodPost, uploadURL, bytes.NewReader(data))
        if err != nil {
                return err
        }
        if contentType == "" {
                contentType = "application/octet-stream"
        }
        req.Header.Set("Content-Type", contentType)
        resp, err := http.DefaultClient.Do(req)
        if err != nil {
                return fmt.Errorf("put %s: %w", gsURI, err)
        }
        defer func() { _ = resp.Body.Close() }()
        if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated {
                body, _ := io.ReadAll(io.LimitReader(resp.Body, 512))
                return fmt.Errorf("put %s: HTTP %d: %s", gsURI, resp.StatusCode, strings.TrimSpace(string(body)))
        }
        return nil
}

func parseGSURI(gsURI string) (bucket, object string, err error) {
        rest := strings.TrimPrefix(gsURI, "gs://")
        slash := strings.Index(rest, "/")
        if slash <= 0 || slash == len(rest)-1 {
                return "", "", fmt.Errorf("invalid gs:// uri: %q", gsURI)
        }
        return rest[:slash], rest[slash+1:], nil
}

package load

import (
        "context"
        "fmt"
        "slices"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
)

const (
        schemaUpdateAllowFieldAddition   = "ALLOW_FIELD_ADDITION"
        schemaUpdateAllowFieldRelaxation = "ALLOW_FIELD_RELAXATION"
)

// TablePatchSchemaOptions are the schemaUpdateOptions honored by
// tables.patch when syncing schema changes to the engine catalog.
var TablePatchSchemaOptions = []string{
        schemaUpdateAllowFieldAddition,
        schemaUpdateAllowFieldRelaxation,
}

// existingDestinationSchema returns the catalog schema for a destination
// table when the load job omits an explicit schema and autodetect=false.
func existingDestinationSchema(ctx context.Context, catalog enginepb.CatalogClient,
        projectID, datasetID, tableID string,
) *bqtypes.TableSchema {
        tableRef := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
        if !tableExists(ctx, catalog, tableRef) {
                return nil
        }
        desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
        if err != nil {
                return nil
        }
        return schemaFromProto(desc.GetSchema())
}

// resolveDestinationSchema merges load-time schema updates into the destination
// table schema for WRITE_APPEND jobs. When the merged schema differs from the
// engine catalog, existing rows are preserved via drop-and-recreate.
func resolveDestinationSchema(ctx context.Context, catalog enginepb.CatalogClient,
        cfg *jobs.JobConfigurationLoad, projectID, datasetID, tableID string,
        loadSchema *bqtypes.TableSchema,
) (*enginepb.TableSchema, error) {
        wd := cfg.WriteDisposition
        if wd == "" {
                wd = writeAppend
        }
        if wd != writeAppend || len(cfg.SchemaUpdateOptions) == 0 {
                return SchemaToProto(loadSchema), nil
        }

        tableRef := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
        if !tableExists(ctx, catalog, tableRef) {
                return SchemaToProto(loadSchema), nil
        }

        desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
        if err != nil {
                return nil, fmt.Errorf("describe destination table: %w", err)
        }
        existing := schemaFromProto(desc.GetSchema())
        explicit := cfg.Schema
        if explicit == nil || len(explicit.Fields) == 0 {
                explicit = loadSchema
        }
        merged, changed, err := mergeSchemas(existing, explicit, cfg.SchemaUpdateOptions, false)
        if err != nil {
                return nil, err
        }
        if !changed {
                return SchemaToProto(existing), nil
        }

        preserved, err := listAllRows(ctx, catalog, tableRef, desc.GetSchema())
        if err != nil {
                return nil, err
        }
        protoMerged := SchemaToProto(merged)
        if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
                return nil, fmt.Errorf("schema update drop table: %w", err)
        }
        if _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                Table:  tableRef,
                Schema: protoMerged,
        }); err != nil {
                return nil, fmt.Errorf("schema update register table: %w", err)
        }
        if len(preserved) > 0 {
                ref := seed.TableRef{ProjectID: projectID, DatasetID: datasetID, TableID: tableID}
                applier := seed.NewCatalogApplier(catalog)
                if _, err := applier.InsertRows(ctx, ref, protoMerged, preserved); err != nil {
                        return nil, fmt.Errorf("schema update re-insert rows: %w", err)
                }
        }
        return protoMerged, nil
}

// MergeSchemasForAppend merges an existing table schema with a query
// result schema honoring BigQuery schemaUpdateOptions.
func MergeSchemasForAppend(
        existing *bqtypes.TableSchema,
        query *bqtypes.TableSchema,
        opts []string,
) (*bqtypes.TableSchema, bool) {
        merged, changed, err := mergeSchemas(existing, query, opts, false)
        if err != nil {
                return existing, false
        }
        return merged, changed
}

// MergeSchemasForTablePatch merges a PATCH body schema into the catalog
// schema, updating descriptions and relaxing REQUIRED→NULLABLE. Returns
// an error when the patch narrows modes or changes types.
func MergeSchemasForTablePatch(
        existing *bqtypes.TableSchema,
        patch *bqtypes.TableSchema,
) (*bqtypes.TableSchema, bool, error) {
        return mergeSchemas(existing, patch, TablePatchSchemaOptions, true)
}

// ApplySchemaUpdate merges querySchema into the destination catalog
// table when schemaUpdateOptions require field addition or relaxation.
// Existing rows are preserved via drop-and-recreate when the merged
// schema differs from the catalog.
func ApplySchemaUpdate(ctx context.Context, catalog enginepb.CatalogClient,
        tableRef *enginepb.TableRef, querySchema *bqtypes.TableSchema, opts []string,
) (*enginepb.TableSchema, error) {
        if len(opts) == 0 || querySchema == nil {
                desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
                if err != nil {
                        return nil, fmt.Errorf("describe destination table: %w", err)
                }
                return desc.GetSchema(), nil
        }
        desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
        if err != nil {
                return nil, fmt.Errorf("describe destination table: %w", err)
        }
        existing := schemaFromProto(desc.GetSchema())
        merged, changed, err := mergeSchemas(existing, querySchema, opts, false)
        if err != nil {
                return nil, err
        }
        if !changed {
                return desc.GetSchema(), nil
        }
        preserved, err := listAllRows(ctx, catalog, tableRef, desc.GetSchema())
        if err != nil {
                return nil, err
        }
        protoMerged := SchemaToProto(merged)
        if _, err := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: tableRef}); err != nil {
                return nil, fmt.Errorf("schema update drop table: %w", err)
        }
        if _, err := catalog.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                Table:  tableRef,
                Schema: protoMerged,
        }); err != nil {
                return nil, fmt.Errorf("schema update register table: %w", err)
        }
        if len(preserved) > 0 {
                ref := seed.TableRef{
                        ProjectID: tableRef.GetProjectId(),
                        DatasetID: tableRef.GetDatasetId(),
                        TableID:   tableRef.GetTableId(),
                }
                applier := seed.NewCatalogApplier(catalog)
                if _, err := applier.InsertRows(ctx, ref, protoMerged, preserved); err != nil {
                        return nil, fmt.Errorf("schema update re-insert rows: %w", err)
                }
        }
        return protoMerged, nil
}

func mergeSchemas(
        existing *bqtypes.TableSchema,
        load *bqtypes.TableSchema,
        opts []string,
        strictPatch bool,
) (*bqtypes.TableSchema, bool, error) {
        if existing == nil {
                existing = &bqtypes.TableSchema{}
        }
        if load == nil {
                return existing, false, nil
        }
        allowAdd := slices.Contains(opts, schemaUpdateAllowFieldAddition)
        allowRelax := slices.Contains(opts, schemaUpdateAllowFieldRelaxation)
        if !allowAdd && !allowRelax {
                return existing, false, nil
        }

        if strictPatch {
                if err := validatePatchAgainstExisting(existing, load); err != nil {
                        return nil, false, err
                }
        }

        merged := cloneBQSchema(existing)
        changed := applySchemaFieldAdditions(merged, load, allowAdd)
        changed = applySchemaRelaxationAndDescriptions(merged, load, allowRelax) || changed
        return merged, changed, nil
}

func validatePatchAgainstExisting(existing *bqtypes.TableSchema, load *bqtypes.TableSchema) error {
        for _, f := range load.Fields {
                idx := fieldIndex(existing.Fields, f.Name)
                if idx < 0 {
                        continue
                }
                if err := validatePatchFieldCompatibility(existing.Fields[idx], f); err != nil {
                        return err
                }
        }
        return nil
}

func applySchemaFieldAdditions(merged *bqtypes.TableSchema, load *bqtypes.TableSchema, allowAdd bool) bool {
        if !allowAdd {
                return false
        }
        changed := false
        for _, f := range load.Fields {
                if fieldIndex(merged.Fields, f.Name) >= 0 {
                        continue
                }
                nf := f
                if nf.Mode == fieldModeRequired {
                        nf.Mode = ""
                }
                merged.Fields = append(merged.Fields, nf)
                changed = true
        }
        return changed
}

func applySchemaRelaxationAndDescriptions(
        merged *bqtypes.TableSchema,
        load *bqtypes.TableSchema,
        allowRelax bool,
) bool {
        changed := false
        for i := range merged.Fields {
                name := merged.Fields[i].Name
                if allowRelax &&
                        merged.Fields[i].Mode == fieldModeRequired &&
                        !loadKeepsFieldRequired(load, name) {
                        merged.Fields[i].Mode = ""
                        changed = true
                }
                patchIdx := fieldIndex(load.Fields, name)
                if patchIdx < 0 {
                        continue
                }
                patchField := load.Fields[patchIdx]
                if patchField.Description != "" &&
                        merged.Fields[i].Description != patchField.Description {
                        merged.Fields[i].Description = patchField.Description
                        changed = true
                }
        }
        return changed
}

func validatePatchFieldCompatibility(
        existing, patch bqtypes.TableFieldSchema,
) error {
        if !fieldTypesCompatible(existing.Type, patch.Type) {
                return fmt.Errorf(
                        "schema update: cannot change type of field %q from %s to %s",
                        existing.Name, existing.Type, patch.Type,
                )
        }
        exMode := normalizeFieldMode(existing.Mode)
        patchMode := normalizeFieldMode(patch.Mode)
        if exMode != fieldModeRequired && patchMode == fieldModeRequired {
                return fmt.Errorf(
                        "schema update: cannot change mode of field %q from %s to REQUIRED",
                        existing.Name, modeLabel(exMode),
                )
        }
        return nil
}

func fieldTypesCompatible(existingType, patchType string) bool {
        a := strings.ToUpper(strings.TrimSpace(existingType))
        b := strings.ToUpper(strings.TrimSpace(patchType))
        if a == b {
                return true
        }
        // REST INTEGER vs engine INT64 round-trip.
        if (a == fieldTypeInt64 || a == fieldTypeInteger) && (b == fieldTypeInt64 || b == fieldTypeInteger) {
                return true
        }
        if (a == fieldTypeFloat64 || a == fieldTypeFloat) && (b == fieldTypeFloat64 || b == fieldTypeFloat) {
                return true
        }
        if (a == fieldTypeBool || a == fieldTypeBoolean) && (b == fieldTypeBool || b == fieldTypeBoolean) {
                return true
        }
        return false
}

func normalizeFieldMode(mode string) string {
        if mode == "" || strings.EqualFold(mode, "NULLABLE") {
                return ""
        }
        return strings.ToUpper(mode)
}

func modeLabel(mode string) string {
        if mode == "" {
                return "NULLABLE"
        }
        return mode
}

func loadKeepsFieldRequired(load *bqtypes.TableSchema, name string) bool {
        if load == nil {
                return false
        }
        idx := fieldIndex(load.Fields, name)
        // Query/load results default to NULLABLE; only keep REQUIRED when the
        // incoming schema still requires it.
        return idx >= 0 && load.Fields[idx].Mode == fieldModeRequired
}

func cloneBQSchema(s *bqtypes.TableSchema) *bqtypes.TableSchema {
        if s == nil {
                return &bqtypes.TableSchema{}
        }
        out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, len(s.Fields))}
        copy(out.Fields, s.Fields)
        return out
}

func fieldIndex(fields []bqtypes.TableFieldSchema, name string) int {
        for i, f := range fields {
                if f.Name == name {
                        return i
                }
        }
        return -1
}

func schemaFromProto(s *enginepb.TableSchema) *bqtypes.TableSchema {
        if s == nil {
                return nil
        }
        out := &bqtypes.TableSchema{Fields: make([]bqtypes.TableFieldSchema, 0, len(s.Fields))}
        for _, f := range s.Fields {
                out.Fields = append(out.Fields, bqtypes.TableFieldSchema{
                        Name:        f.GetName(),
                        Type:        f.GetType(),
                        Mode:        f.GetMode(),
                        Description: f.GetDescription(),
                })
        }
        return out
}

func listAllRows(ctx context.Context, catalog enginepb.CatalogClient,
        tableRef *enginepb.TableRef, schema *enginepb.TableSchema,
) ([]map[string]any, error) {
        const page = 10000
        var out []map[string]any
        start := int64(0)
        for {
                resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                        Table:      tableRef,
                        StartIndex: start,
                        MaxResults: page,
                })
                if err != nil {
                        return nil, fmt.Errorf("list rows for schema update: %w", err)
                }
                rows := resp.GetRows()
                if len(rows) == 0 {
                        break
                }
                for _, row := range rows {
                        out = append(out, protoRowToMap(schema, row))
                }
                start += int64(len(rows))
                if start >= resp.GetTotalRows() {
                        break
                }
        }
        return out, nil
}

func protoRowToMap(schema *enginepb.TableSchema, row *enginepb.DataRow) map[string]any {
        fields := schema.GetFields()
        cells := row.GetCells()
        out := make(map[string]any, len(fields))
        for i, f := range fields {
                if i >= len(cells) {
                        out[f.GetName()] = nil
                        continue
                }
                out[f.GetName()] = protoCellToAny(cells[i])
        }
        return out
}

func protoCellToAny(c *enginepb.Cell) any {
        if c == nil || c.GetNullValue() {
                return nil
        }
        if v := c.GetStringValue(); v != "" || c.GetValue() != nil {
                return v
        }
        return nil
}

package load

import (
        "bytes"
        "crypto/rand"
        "encoding/hex"
        "errors"
        "fmt"
        "io"
        "mime"
        "mime/multipart"
        "net/http"
        "strconv"
        "strings"
        "sync"
        "time"
)

// UploadSession tracks a resumable jobs.insert media upload.
type UploadSession struct {
        ProjectID string
        Metadata  []byte
        Data      []byte
        Total     int64
        Expires   time.Time
}

// UploadStore holds in-process resumable upload sessions.
type UploadStore struct {
        mu       sync.Mutex
        sessions map[string]*UploadSession
}

// NewUploadStore returns a fresh upload session table.
func NewUploadStore() *UploadStore {
        return &UploadStore{sessions: map[string]*UploadSession{}}
}

var defaultUploadStore = NewUploadStore()

// DefaultUploadStore is the process-local resumable upload session table.
func DefaultUploadStore() *UploadStore { return defaultUploadStore }

// CreateSession registers a resumable upload session and returns its id.
func (s *UploadStore) CreateSession(projectID string, metadata []byte, total int64) string {
        id := newUploadID()
        s.mu.Lock()
        defer s.mu.Unlock()
        s.sessions[id] = &UploadSession{
                ProjectID: projectID,
                Metadata:  append([]byte(nil), metadata...),
                Total:     total,
                Expires:   time.Now().UTC().Add(24 * time.Hour),
        }
        return id
}

// Get returns the session for id, or nil when unknown or expired.
func (s *UploadStore) Get(id string) *UploadSession {
        s.mu.Lock()
        defer s.mu.Unlock()
        sess, ok := s.sessions[id]
        if !ok || time.Now().UTC().After(sess.Expires) {
                delete(s.sessions, id)
                return nil
        }
        return sess
}

// Delete removes a completed or abandoned session.
func (s *UploadStore) Delete(id string) {
        s.mu.Lock()
        defer s.mu.Unlock()
        delete(s.sessions, id)
}

// AppendBytes appends chunk data for a resumable session.
func (s *UploadStore) AppendBytes(id string, chunk []byte, start int64) error {
        s.mu.Lock()
        defer s.mu.Unlock()
        sess, ok := s.sessions[id]
        if !ok || time.Now().UTC().After(sess.Expires) {
                delete(s.sessions, id)
                return errors.New("upload session not found")
        }
        need := int(start) + len(chunk)
        if need > len(sess.Data) {
                grown := make([]byte, need)
                copy(grown, sess.Data)
                sess.Data = grown
        }
        copy(sess.Data[start:], chunk)
        return nil
}

// ReceivedBytes returns how many bytes have been stored for the session.
func (s *UploadStore) ReceivedBytes(id string) int64 {
        s.mu.Lock()
        defer s.mu.Unlock()
        sess, ok := s.sessions[id]
        if !ok {
                return 0
        }
        return int64(len(sess.Data))
}

// ParseMultipartJob extracts the metadata JSON and media bytes from a
// multipart/related jobs.insert upload body.
func ParseMultipartJob(body []byte, contentType string) (metadata, media []byte, err error) {
        mediaType, params, err := mime.ParseMediaType(contentType)
        if err != nil {
                return nil, nil, fmt.Errorf("parse Content-Type: %w", err)
        }
        if !strings.HasPrefix(mediaType, "multipart/") {
                return nil, nil, fmt.Errorf("expected multipart Content-Type, got %q", mediaType)
        }
        boundary := params["boundary"]
        if boundary == "" {
                return nil, nil, errors.New("multipart boundary missing")
        }
        reader := multipart.NewReader(bytes.NewReader(body), boundary)
        for partIndex := 0; ; partIndex++ {
                part, perr := reader.NextPart()
                if perr == io.EOF {
                        break
                }
                if perr != nil {
                        return nil, nil, fmt.Errorf("read multipart part: %w", perr)
                }
                data, rerr := io.ReadAll(part)
                if rerr != nil {
                        return nil, nil, fmt.Errorf("read multipart body: %w", rerr)
                }
                switch partIndex {
                case 0:
                        metadata = data
                case 1:
                        media = data
                }
        }
        if len(metadata) == 0 {
                return nil, nil, errors.New("multipart upload missing metadata part")
        }
        return metadata, media, nil
}

// ParseContentRange parses a Content-Range header (bytes start-end/total).
// When total is unknown the third return is -1.
func ParseContentRange(header string) (start, end, total int64, ok bool) {
        header = strings.TrimSpace(header)
        if !strings.HasPrefix(header, "bytes ") {
                return 0, 0, 0, false
        }
        rest := strings.TrimPrefix(header, "bytes ")
        parts := strings.Split(rest, "/")
        if len(parts) != 2 {
                return 0, 0, 0, false
        }
        if parts[0] == "*" {
                if parts[1] == "*" {
                        return 0, 0, -1, true
                }
                t, err := strconv.ParseInt(parts[1], 10, 64)
                if err != nil {
                        return 0, 0, 0, false
                }
                return 0, 0, t, true
        }
        rangeParts := strings.Split(parts[0], "-")
        if len(rangeParts) != 2 {
                return 0, 0, 0, false
        }
        start, err := strconv.ParseInt(rangeParts[0], 10, 64)
        if err != nil {
                return 0, 0, 0, false
        }
        end, err = strconv.ParseInt(rangeParts[1], 10, 64)
        if err != nil {
                return 0, 0, 0, false
        }
        if parts[1] == "*" {
                return start, end, -1, true
        }
        total, err = strconv.ParseInt(parts[1], 10, 64)
        if err != nil {
                return 0, 0, 0, false
        }
        return start, end, total, true
}

// WriteResumeIncomplete responds with HTTP 308 for partial resumable uploads.
func WriteResumeIncomplete(w http.ResponseWriter, received int64) {
        if received > 0 {
                w.Header().Set("Range", fmt.Sprintf("0-%d", received-1))
        }
        w.WriteHeader(308) // Resume Incomplete per api-uploads.md
}

func newUploadID() string {
        var b [16]byte
        _, _ = rand.Read(b[:])
        return hex.EncodeToString(b[:])
}

// SessionLocation builds the relative resumable session URI path.
func SessionLocation(projectID, uploadID string) string {
        return fmt.Sprintf(
                "/upload/bigquery/v2/projects/%s/jobs?uploadType=resumable&upload_id=%s",
                projectID, uploadID,
        )
}

// AbsoluteSessionLocation builds a fully-qualified Location header value.
// Python/Node resumable upload clients pass the Location URL directly to
// requests/teeny-request and require a scheme (relative paths → MissingSchema).
func AbsoluteSessionLocation(baseURL, projectID, uploadID string) string {
        path := SessionLocation(projectID, uploadID)
        base := strings.TrimRight(strings.TrimSpace(baseURL), "/")
        if base == "" {
                return path
        }
        return base + path
}

// Package middleware contains HTTP middleware for the BigQuery emulator
// gateway. The middleware here is intentionally permissive: the emulator
// follows cloud-spanner-emulator's posture and parses but does not
// validate authentication credentials.
//
// See docs/REST_API.md ("Authentication posture") and the
// gateway-HTTP-surface section of ROADMAP.md for the rationale:
// clients reuse their real BigQuery code paths by
// pointing at the emulator, and that code unconditionally sends a bearer
// token. Rejecting those tokens would force every client to special-case
// the emulator, which we explicitly want to avoid.
package middleware

import (
        "context"
        "net/http"
        "strings"
)

// principalCtxKey is the unexported context-key type used to stash the
// synthetic [Principal] on each request. Following the standard library
// guidance, we use a private named type so callers cannot collide with
// our key by accident.
type principalCtxKey struct{}

// Principal is the synthetic identity the emulator attributes to every
// request. The fields are populated by [WithAuth]; consumers retrieve
// the value via [PrincipalFromContext].
//
// The emulator is deliberately credulous: it does not validate the
// bearer token, look up an account, or check IAM. Handlers that need
// to differentiate authenticated vs anonymous traffic should consult
// [Principal.Anonymous].
type Principal struct {
        // Email is the synthetic account email attributed to the caller.
        // It is the same value for every request and exists only so logs
        // and (eventually) audit trails have a stable subject string.
        Email string

        // Bearer is the raw token the client presented in the Authorization
        // header, with the "Bearer " prefix stripped. Empty when no
        // Authorization header was sent or the header could not be parsed.
        Bearer string

        // Anonymous is true when the request did not present an
        // Authorization header at all. A request with a malformed header is
        // still considered non-anonymous: the emulator only cares whether
        // the client tried, not whether the credential is well-formed.
        Anonymous bool
}

// defaultPrincipalEmail is the synthetic email used for every request.
// It mirrors cloud-spanner-emulator, which similarly attributes all
// traffic to a fixed local identity.
const defaultPrincipalEmail = "emulator@bigquery.local"

// WithAuth returns middleware that parses the Authorization header (if
// present) and attaches a [Principal] to the request context. It never
// short-circuits the response: every request is allowed through with a
// synthetic identity, matching cloud-spanner-emulator's posture and the
// emulator's documented behavior in docs/REST_API.md.
//
// The middleware accepts any non-empty Authorization header. RFC 6750
// "Bearer" tokens have the "Bearer " prefix stripped; other schemes are
// stored verbatim in [Principal.Bearer]. The header is never logged.
func WithAuth(next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                p := Principal{
                        Email:     defaultPrincipalEmail,
                        Anonymous: true,
                }
                if h := strings.TrimSpace(r.Header.Get("Authorization")); h != "" {
                        p.Anonymous = false
                        if scheme, token, ok := strings.Cut(h, " "); ok && strings.EqualFold(scheme, "Bearer") {
                                p.Bearer = strings.TrimSpace(token)
                        } else {
                                p.Bearer = h
                        }
                }
                ctx := context.WithValue(r.Context(), principalCtxKey{}, p)
                next.ServeHTTP(w, r.WithContext(ctx))
        })
}

// PrincipalFromContext extracts the [Principal] previously attached by
// [WithAuth]. The boolean is false when the context has no principal,
// which should only happen on requests that bypass the middleware (such
// as direct calls in tests).
func PrincipalFromContext(ctx context.Context) (Principal, bool) {
        p, ok := ctx.Value(principalCtxKey{}).(Principal)
        return p, ok
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package middleware

import (
        "encoding/json"
        "net/http"
)

// jsonContentType is the Content-Type header value middleware emits on
// JSON error envelopes. Pinned here so a future tweak (utf-16, an
// http/2 specifier, etc.) lands in exactly one place.
const jsonContentType = "application/json; charset=utf-8"

// errReasonInvalid is the BigQuery-shaped error `status` / `errors[].reason`
// value used for 400 responses the gateway-layer middlewares emit
// (gunzip body invalid, method override misuse). The handlers package
// uses the same literal in `handlers.writeError` calls; we keep a
// local copy here because middleware -> handlers is a forbidden import
// direction (handlers depends on middleware for auth context).
const errReasonInvalid = "invalid"

// fieldKeyMessage is the JSON field key used inside the BigQuery
// `errorBody.errors[].message` envelope. Kept as a named const so the
// recurring use across [writeGunzipError] and [writeMethodOverrideError]
// stays goconst-clean.
const fieldKeyMessage = "message"

// writeJSONError emits a BigQuery-shaped JSON error envelope at
// `status` with `reason` and `msg`. Mirrors `handlers.writeError`
// byte-for-byte but keeps middleware free of a handlers import (which
// would close an import cycle — handlers depends on middleware for
// the auth-context lookup).
func writeJSONError(w http.ResponseWriter, status int, reason, msg string) {
        body := map[string]any{
                "error": map[string]any{
                        "code":          status,
                        fieldKeyMessage: msg,
                        "status":        reason,
                        "errors": []map[string]any{{
                                "reason":        reason,
                                fieldKeyMessage: msg,
                                "domain":        "global",
                        }},
                },
        }
        w.Header().Set("Content-Type", jsonContentType)
        w.WriteHeader(status)
        _ = json.NewEncoder(w).Encode(body)
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package middleware

import (
        "compress/gzip"
        "io"
        "net/http"
        "strings"
)

// WithGunzipRequestBody wraps next so that when the client sends
// `Content-Encoding: gzip` the request body is transparently
// decompressed before downstream handlers read it. The Java BigQuery
// client gzips JSON POST bodies by default; without this middleware
// every dataset/table create against the emulator REST gateway returns
// `invalid character '\x1f'` because handlers see the raw gzip framing.
//
// The middleware keeps the emulator and full-engine paths aligned on shape: missing/empty
// Content-Encoding short-circuits to next without allocation, an
// invalid gzip stream returns a BigQuery-shaped 400, and on success
// the Content-Encoding header is dropped so handlers don't double-
// decode if they happen to be aware of the encoding.
func WithGunzipRequestBody(next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                if r == nil || r.Body == nil || r.Body == http.NoBody {
                        next.ServeHTTP(w, r)
                        return
                }
                ce := strings.ToLower(strings.TrimSpace(r.Header.Get("Content-Encoding")))
                if ce == "" || !strings.Contains(ce, "gzip") {
                        next.ServeHTTP(w, r)
                        return
                }
                gr, err := gzip.NewReader(r.Body)
                if err != nil {
                        writeGunzipError(w, "invalid gzip request body: "+err.Error())
                        return
                }
                // gzip.Reader.Close does NOT close the underlying io.ReadCloser
                // (see compress/gzip.Reader.Close), so wrap both so the http
                // server's body cleanup still fires.
                r.Body = &gzipRequestBody{gzip: gr, underlying: r.Body}
                // Drop the original Content-Encoding so handlers (and any
                // downstream middleware that inspects the header) don't try
                // to decode again, and clear Content-Length because the
                // inflated stream is necessarily a different size.
                r.Header.Del("Content-Encoding")
                r.Header.Del("Content-Length")
                r.ContentLength = -1
                next.ServeHTTP(w, r)
        })
}

type gzipRequestBody struct {
        gzip       *gzip.Reader
        underlying io.ReadCloser
}

func (b *gzipRequestBody) Read(p []byte) (int, error) {
        return b.gzip.Read(p)
}

func (b *gzipRequestBody) Close() error {
        _ = b.gzip.Close()
        return b.underlying.Close()
}

// writeGunzipError emits a BigQuery-shaped 400 envelope. Delegates to
// the shared [writeJSONError] helper so the envelope shape stays in
// sync with the method-override middleware's 400 path and so the
// "invalid" / "message" string literals are referenced from exactly
// one place (goconst-clean).
func writeGunzipError(w http.ResponseWriter, msg string) {
        writeJSONError(w, http.StatusBadRequest, errReasonInvalid, msg)
}

package middleware

import (
        "context"
        "net"
        "net/http"
)

// loopbackCtxKey is the unexported context-key type used to stash the
// per-request loopback flag [WithLoopbackTag] computes. Following the
// standard library guidance, we use a private named type so callers
// cannot collide with our key by accident.
type loopbackCtxKey struct{}

// WithLoopbackTag returns middleware that records whether the request
// arrived from a loopback caller (an HTTP client bound to `127.0.0.0/8`
// or `::1`, or a unix-socket connection where `RemoteAddr` is empty).
// The flag is stashed in the request context so handlers can decide
// whether to surface emulator-internal debug fields back to the caller
// without having to re-parse `r.RemoteAddr`.
//
// The single user today is the synchronous query handler, which uses
// the flag to gate `Job.statistics.query.emulatorRoute` (the canonical
// `Disposition` string the C++ coordinator's `RouteClassifier`
// produced for the query). The contract is that field is observable
// ONLY to loopback callers; non-loopback callers receive a response
// with the field omitted entirely, matching the public BigQuery REST
// surface byte-for-byte. See
// `docs/ENGINE_POLICY.md` for the wider
// rationale.
//
// The middleware never short-circuits the response: it only attaches
// a boolean to the context. Handlers that want loopback-only behavior
// call [IsLoopback]; handlers that don't care are unaffected.
func WithLoopbackTag(next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                ctx := context.WithValue(r.Context(), loopbackCtxKey{},
                        isLoopbackRemoteAddr(r.RemoteAddr))
                next.ServeHTTP(w, r.WithContext(ctx))
        })
}

// IsLoopback reports whether the request that owns this context
// originated from a loopback caller. It returns false when the
// context has no loopback flag (i.e. the request bypassed
// [WithLoopbackTag], which should only happen in direct unit tests
// that don't go through the middleware stack). Callers that need the
// inverse default for tests can adjust at the call site -- the
// middleware-bypass case is rare enough that "treat as non-loopback"
// is the safer default for production code.
func IsLoopback(ctx context.Context) bool {
        v, _ := ctx.Value(loopbackCtxKey{}).(bool)
        return v
}

// isLoopbackRemoteAddr returns true when `addr` is the standard Go
// `host:port` shape and the host resolves to a loopback IP. Unix-
// socket connections present an empty `RemoteAddr` on most servers
// (httptest's `httptest.Server` always binds TCP, but a unix-listener
// backed gateway leaves the field empty); we treat the empty string
// as loopback because that is the only realistic deployment shape
// for a unix-socket emulator.
//
// Malformed addresses are treated as non-loopback so a bug in an
// upstream proxy that strips the port can't accidentally elevate a
// public caller to loopback status.
func isLoopbackRemoteAddr(addr string) bool {
        if addr == "" {
                return true
        }
        host, _, err := net.SplitHostPort(addr)
        if err != nil {
                // `RemoteAddr` should be in `host:port` form per the
                // net/http documentation. A failure here means the input
                // is malformed; default to non-loopback so a misrouted
                // caller cannot accidentally observe loopback-only fields.
                return false
        }
        ip := net.ParseIP(host)
        if ip == nil {
                return false
        }
        return ip.IsLoopback()
}

// Copyright 2026 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package middleware

import (
        "net/http"
        "strings"
)

// methodOverrideHeader is the canonical header name the
// google-api-client `MethodOverride` interceptor sets when its
// underlying transport reports that a method (typically PATCH) is
// unsupported. See `com.google.api.client.googleapis.MethodOverride`
// in google-api-client-2.x and `NetHttpTransport.SUPPORTED_METHODS`
// in google-http-client (PATCH is intentionally omitted from that
// list, so PATCH-shaped REST calls travel as POST + this header).
const methodOverrideHeader = "X-HTTP-Method-Override"

// WithMethodOverride returns middleware that honors the
// `X-HTTP-Method-Override` request header so emulator clients written
// against transports that do not support PATCH (notably the Java
// google-api-client + java.net.HttpURLConnection combo) can drive
// PATCH/PUT/DELETE handlers via a tunneled POST.
//
// Why we need this: the Java BigQuery client's default
// `NetHttpTransport` advertises support for GET, HEAD, OPTIONS, POST,
// PUT, DELETE, TRACE — but not PATCH. The
// `com.google.api.client.googleapis.MethodOverride` interceptor (a
// default `HttpExecuteInterceptor` on every google-api-client request)
// rewrites unsupported methods to POST and sets
// `X-HTTP-Method-Override: <originalMethod>`. The gateway's mux
// otherwise mounts dataset/table/job updates at PATCH and PUT, so
// without this middleware Java callers like AuthorizeDatasetIT land on
// `DatasetCustomMethodPOST` (the `/datasets/{id}:undelete` dispatcher)
// and get a 405 against a request that was logically a PATCH.
// Mounting the override at the middleware layer fixes the entire
// gateway surface with one rewrite point instead of teaching every
// `*CustomMethodPOST` handler to also accept ACL bodies.
//
// Behavior:
//   - Header absent → pass through unchanged. The middleware never
//     allocates anything for the common case.
//   - Header set + request method is POST + override is one of
//     PATCH/PUT/DELETE (case-insensitive) → rewrite `r.Method` to the
//     uppercase override and continue. Mux dispatch then routes the
//     request to the genuine PATCH/PUT/DELETE handler.
//   - Header set + request method is not POST → 400. The
//     google-api-client interceptor only ever sets the header on a
//     POST it just rewrote, so a non-POST + override is either a
//     misconfigured client or an attempt to confuse the dispatcher;
//     either way, refusing is the safe answer.
//   - Header set + override value is anything other than
//     PATCH/PUT/DELETE → 400. We don't honor `GET`/`HEAD`/`OPTIONS`
//     because they are not what the Java client tunnels and we want
//     a tight surface.
//
// Mounting: place the middleware after the access-log middleware so
// the access log records the original POST + override-header pair,
// but before any middleware or handler that routes on `r.Method`.
// In `gateway/server.go::wrapMiddleware` it sits between the
// loopback-tag middleware and the request-log layer; see that
// function for the canonical chain order.
func WithMethodOverride(next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                raw := r.Header.Get(methodOverrideHeader)
                if raw == "" {
                        next.ServeHTTP(w, r)
                        return
                }
                if r.Method != http.MethodPost {
                        writeMethodOverrideError(w,
                                "X-HTTP-Method-Override is only valid on POST requests; "+
                                        "received "+r.Method)
                        return
                }
                upper := strings.ToUpper(strings.TrimSpace(raw))
                switch upper {
                case http.MethodPatch, http.MethodPut, http.MethodDelete:
                        r.Method = upper
                default:
                        writeMethodOverrideError(w,
                                "X-HTTP-Method-Override must be one of PATCH, PUT, "+
                                        "or DELETE; received "+raw)
                        return
                }
                next.ServeHTTP(w, r)
        })
}

// writeMethodOverrideError emits a BigQuery-shaped 400 envelope.
// Delegates to the shared [writeJSONError] helper so the envelope
// shape stays in lockstep with [writeGunzipError]; consolidating the
// JSON layout in one place also keeps the goconst linter from
// flagging recurring `"invalid"` / `"message"` literals once a third
// middleware needs to emit a 400.
func writeMethodOverrideError(w http.ResponseWriter, msg string) {
        writeJSONError(w, http.StatusBadRequest, errReasonInvalid, msg)
}

package models

import (
        "regexp"
        "strconv"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

var modelTypeRE = regexp.MustCompile(`(?i)model_type\s*=\s*'([^']+)'`)

const defaultModelType = "LINEAR_REG"

// RegisterFromDDL parses CREATE MODEL DDL and upserts metadata into store.
func RegisterFromDDL(store *Store, projectID, defaultDatasetID, sql string) *bqtypes.ModelReference {
        m, ok := parseCreateModelDDL(projectID, defaultDatasetID, sql)
        if !ok {
                return nil
        }
        store.Upsert(m)
        ref := m.ModelReference
        return &ref
}

func parseCreateModelDDL(projectID, defaultDatasetID, sql string) (bqtypes.Model, bool) {
        trimmed := strings.TrimSpace(sql)
        upper := strings.ToUpper(trimmed)
        switch {
        case strings.HasPrefix(upper, "CREATE OR REPLACE MODEL"):
                trimmed = strings.TrimSpace(trimmed[len("CREATE OR REPLACE MODEL"):])
        case strings.HasPrefix(upper, "CREATE MODEL IF NOT EXISTS"):
                trimmed = strings.TrimSpace(trimmed[len("CREATE MODEL IF NOT EXISTS"):])
        case strings.HasPrefix(upper, "CREATE MODEL"):
                trimmed = strings.TrimSpace(trimmed[len("CREATE MODEL"):])
        default:
                return bqtypes.Model{}, false
        }
        name, _, ok := parseQuotedOrBareName(trimmed)
        if !ok {
                return bqtypes.Model{}, false
        }
        pID, dID, mID := splitModelName(projectID, defaultDatasetID, name)
        modelType := defaultModelType
        if m := modelTypeRE.FindStringSubmatch(sql); len(m) == 2 {
                modelType = strings.ToUpper(strings.TrimSpace(m[1]))
        }
        now := nowMillis()
        return bqtypes.Model{
                ModelReference: bqtypes.ModelReference{
                        ProjectID: pID,
                        DatasetID: dID,
                        ModelID:   mID,
                },
                ModelType:        modelType,
                CreationTime:     now,
                LastModifiedTime: now,
                Etag:             MintEtag(),
        }, true
}

func splitModelName(projectID, defaultDatasetID, name string) (pID, dID, mID string) {
        parts := strings.Split(name, ".")
        switch len(parts) {
        case 3:
                return parts[0], parts[1], parts[2]
        case 2:
                return projectID, parts[0], parts[1]
        default:
                return projectID, defaultDatasetID, strings.Trim(parts[0], "`")
        }
}

func parseQuotedOrBareName(rest string) (name, tail string, ok bool) {
        rest = strings.TrimSpace(rest)
        if rest == "" {
                return "", "", false
        }
        if rest[0] == '`' {
                end := strings.Index(rest[1:], "`")
                if end < 0 {
                        return "", "", false
                }
                name = rest[1 : end+1]
                return name, strings.TrimSpace(rest[end+2:]), true
        }
        idx := strings.IndexAny(rest, " \t\n\r(")
        if idx < 0 {
                return rest, "", true
        }
        return rest[:idx], strings.TrimSpace(rest[idx:]), true
}

func nowMillis() string {
        return strconv.FormatInt(time.Now().UTC().UnixMilli(), 10)
}

// Package models is the gateway-side in-memory registry of BigQuery ML
// model metadata registered by CREATE MODEL DDL stubs. REST handlers
// and query jobs register models here so client libraries can round-trip
// list/get/delete without a trained-model store.
package models

import (
        "crypto/rand"
        "encoding/hex"
        "maps"
        "slices"
        "strings"
        "sync"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// Store holds model metadata keyed by projectId:datasetId.modelId.
type Store struct {
        mu     sync.RWMutex
        models map[string]bqtypes.Model
}

// NewStore returns an empty model registry.
func NewStore() *Store {
        return &Store{models: map[string]bqtypes.Model{}}
}

func modelKey(projectID, datasetID, modelID string) string {
        return projectID + ":" + datasetID + "." + modelID
}

// Upsert registers or replaces model metadata.
func (s *Store) Upsert(m bqtypes.Model) {
        if s == nil {
                return
        }
        ref := m.ModelReference
        key := modelKey(ref.ProjectID, ref.DatasetID, ref.ModelID)
        s.mu.Lock()
        defer s.mu.Unlock()
        s.models[key] = cloneModel(m)
}

// Get returns a model snapshot and whether it was found.
func (s *Store) Get(projectID, datasetID, modelID string) (bqtypes.Model, bool) {
        if s == nil {
                return bqtypes.Model{}, false
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        m, ok := s.models[modelKey(projectID, datasetID, modelID)]
        return cloneModel(m), ok
}

// Delete removes a model. Returns false when absent.
func (s *Store) Delete(projectID, datasetID, modelID string) bool {
        if s == nil {
                return false
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := modelKey(projectID, datasetID, modelID)
        if _, ok := s.models[key]; !ok {
                return false
        }
        delete(s.models, key)
        return true
}

// List returns every model in the dataset, optionally filtered by a
// BigQuery list filter string (only `model_id=<id>` is supported today).
func (s *Store) List(projectID, datasetID, filter string) []bqtypes.Model {
        if s == nil {
                return nil
        }
        wantID := parseModelIDFilter(filter)
        prefix := projectID + ":" + datasetID + "."
        s.mu.RLock()
        defer s.mu.RUnlock()
        out := make([]bqtypes.Model, 0)
        for key, m := range s.models {
                if !strings.HasPrefix(key, prefix) {
                        continue
                }
                if wantID != "" && m.ModelReference.ModelID != wantID {
                        continue
                }
                out = append(out, cloneModel(m))
        }
        slices.SortFunc(out, func(a, b bqtypes.Model) int {
                return strings.Compare(a.ModelReference.ModelID, b.ModelReference.ModelID)
        })
        return out
}

func parseModelIDFilter(filter string) string {
        filter = strings.TrimSpace(filter)
        if filter == "" {
                return ""
        }
        const prefix = "model_id="
        if strings.HasPrefix(filter, prefix) {
                return strings.TrimSpace(filter[len(prefix):])
        }
        return ""
}

// MintEtag returns a random etag for optimistic concurrency.
func MintEtag() string {
        var b [8]byte
        _, _ = rand.Read(b[:])
        return hex.EncodeToString(b[:])
}

func cloneModel(m bqtypes.Model) bqtypes.Model {
        out := m
        if len(m.Labels) > 0 {
                out.Labels = make(map[string]string, len(m.Labels))
                maps.Copy(out.Labels, m.Labels)
        }
        return out
}

package query

import (
        "errors"
        "fmt"
        "regexp"
        "strconv"
        "strings"
        "time"
)

// backtickDecoratedRE matches `project.dataset.table@123` or `dataset.table@-3600000`.
var backtickDecoratedRE = regexp.MustCompile("`([^`]+)@(-?[0-9]+)`")

// LowerTableDecorators rewrites BigQuery table time decorators embedded in
// backtick table paths to FOR SYSTEM_TIME AS OF, matching the engine's
// historical read path. Relative offsets (@-3600000) are resolved against
// the current UTC clock at rewrite time.
func LowerTableDecorators(sql string) (string, error) {
        trim := strings.TrimSpace(sql)
        if trim == "" {
                return sql, nil
        }
        if hasDecoratorConflict(trim) {
                return "", errors.New(
                        "cannot use table decorator with FOR SYSTEM_TIME AS OF")
        }
        return backtickDecoratedRE.ReplaceAllStringFunc(sql, func(match string) string {
                parts := backtickDecoratedRE.FindStringSubmatch(match)
                if len(parts) != 3 {
                        return match
                }
                base := parts[1]
                raw := parts[2]
                epoch, err := resolveDecoratorEpoch(raw)
                if err != nil {
                        return match
                }
                return fmt.Sprintf("`%s` FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
                        base, epoch)
        }), nil
}

func hasDecoratorConflict(sql string) bool {
        upper := strings.ToUpper(sql)
        if !strings.Contains(upper, "FOR SYSTEM_TIME AS OF") {
                return false
        }
        return backtickDecoratedRE.MatchString(sql) ||
                legacyBracketDecoratorRE.MatchString(sql)
}

func resolveDecoratorEpoch(raw string) (int64, error) {
        if strings.HasPrefix(raw, "-") {
                offset, err := strconv.ParseInt(raw, 10, 64)
                if err != nil {
                        return 0, err
                }
                return time.Now().UTC().UnixMilli() + offset, nil
        }
        return strconv.ParseInt(raw, 10, 64)
}

package query

import (
        "context"
        "errors"
        "fmt"
        "regexp"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/load"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
)

// MetadataStore mirrors the handlers.MetadataStore surface needed to
// stash REST-only destination table metadata without an import cycle.
type MetadataStore interface {
        MergeTable(projectID, datasetID, tableID string, patch bqtypes.Table)
}

const implicitDestDatasetID = "_bqemu_query_results"

var nonIdentRE = regexp.MustCompile(`[^a-zA-Z0-9_]+`)

const (
        writeTruncate = "WRITE_TRUNCATE"
        writeEmpty    = "WRITE_EMPTY"
        writeAppend   = "WRITE_APPEND"
)

// AppendResultsFromQueryRequest writes jobs.query output into an
// explicit destinationTable when the synchronous QueryRequest carries
// destination metadata (authViewTutorial, client.query with destination).
func AppendResultsFromQueryRequest(ctx context.Context, catalog enginepb.CatalogClient,
        req *bqtypes.QueryRequest, projectID string,
        resultSchema *bqtypes.TableSchema, rows []bqtypes.Row,
) error {
        if req == nil || req.DestinationTable == nil || req.DestinationTable.TableID == "" {
                return nil
        }
        cfg := &jobs.JobConfigurationQuery{
                DestinationTable:    req.DestinationTable,
                WriteDisposition:    req.WriteDisposition,
                SchemaUpdateOptions: req.SchemaUpdateOptions,
        }
        return AppendResults(ctx, catalog, cfg, projectID, resultSchema, rows)
}

// AppendResults writes synchronous query output into
// configuration.query.destinationTable when the job requests a
// destination table. Schema update options on WRITE_APPEND merge new
// columns or relax REQUIRED fields before rows are inserted.
func AppendResults(ctx context.Context, catalog enginepb.CatalogClient,
        cfg *jobs.JobConfigurationQuery, projectID string,
        resultSchema *bqtypes.TableSchema, rows []bqtypes.Row,
) error {
        if cfg == nil || cfg.DestinationTable == nil || cfg.DestinationTable.TableID == "" {
                return nil
        }
        if catalog == nil {
                return errors.New("query destination requires Catalog client")
        }
        wd := cfg.WriteDisposition
        if wd == "" {
                wd = writeTruncate
        }

        destProject := cfg.DestinationTable.ProjectID
        if destProject == "" {
                destProject = projectID
        }
        destDataset := cfg.DestinationTable.DatasetID
        destTable := cfg.DestinationTable.TableID

        tableRef := &enginepb.TableRef{
                ProjectId: destProject,
                DatasetId: destDataset,
                TableId:   destTable,
        }
        protoResult := load.SchemaToProto(resultSchema)
        if err := load.EnsureDataset(ctx, catalog, destProject, destDataset); err != nil {
                return err
        }

        if len(rows) == 0 {
                return appendEmptyQueryDestination(ctx, catalog, cfg, wd, destProject, destDataset, destTable,
                        tableRef, resultSchema, protoResult)
        }
        protoSchema, err := resolveDestinationProtoSchema(ctx, catalog, cfg, wd, destProject, destDataset,
                destTable, tableRef, resultSchema, protoResult)
        if err != nil {
                return err
        }
        return insertDestinationRows(ctx, catalog, destProject, destDataset, destTable, protoSchema,
                resultSchema, rows)
}

func appendEmptyQueryDestination(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        cfg *jobs.JobConfigurationQuery,
        wd, destProject, destDataset, destTable string,
        tableRef *enginepb.TableRef,
        resultSchema *bqtypes.TableSchema,
        protoResult *enginepb.TableSchema,
) error {
        switch wd {
        case writeTruncate, writeEmpty:
                return load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
                        wd, protoResult)
        case writeAppend:
                if len(cfg.SchemaUpdateOptions) == 0 {
                        return nil
                }
                if err := load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
                        writeAppend, protoResult); err != nil {
                        return fmt.Errorf("ensure query destination table: %w", err)
                }
                if _, err := load.ApplySchemaUpdate(ctx, catalog, tableRef, resultSchema, cfg.SchemaUpdateOptions); err != nil {
                        return err
                }
                return nil
        default:
                return nil
        }
}

func resolveDestinationProtoSchema(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        cfg *jobs.JobConfigurationQuery,
        wd, destProject, destDataset, destTable string,
        tableRef *enginepb.TableRef,
        resultSchema *bqtypes.TableSchema,
        protoResult *enginepb.TableSchema,
) (*enginepb.TableSchema, error) {
        var protoSchema *enginepb.TableSchema
        switch wd {
        case writeAppend:
                if err := load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
                        writeAppend, protoResult); err != nil {
                        return nil, fmt.Errorf("ensure query destination table: %w", err)
                }
                var err error
                protoSchema, err = load.ApplySchemaUpdate(ctx, catalog, tableRef, resultSchema, cfg.SchemaUpdateOptions)
                if err != nil {
                        return nil, err
                }
        case writeTruncate, writeEmpty:
                if err := load.EnsureDestinationTable(ctx, catalog, destProject, destDataset, destTable,
                        wd, protoResult); err != nil {
                        return nil, fmt.Errorf("ensure query destination table: %w", err)
                }
                protoSchema = protoResult
        default:
                return nil, fmt.Errorf("query destination writeDisposition %q is not supported", wd)
        }
        if protoSchema == nil {
                desc, derr := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: tableRef})
                if derr != nil {
                        return nil, fmt.Errorf("describe destination table: %w", derr)
                }
                protoSchema = desc.GetSchema()
        }
        return protoSchema, nil
}

func insertDestinationRows(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        destProject, destDataset, destTable string,
        protoSchema *enginepb.TableSchema,
        resultSchema *bqtypes.TableSchema,
        rows []bqtypes.Row,
) error {
        ref := seed.TableRef{ProjectID: destProject, DatasetID: destDataset, TableID: destTable}
        applier := seed.NewCatalogApplier(catalog)
        rowMaps := restRowsToMaps(resultSchema, rows)
        if _, err := applier.InsertRows(ctx, ref, protoSchema, rowMaps); err != nil {
                return fmt.Errorf("query destination insert rows: %w", err)
        }
        return nil
}

// PersistDestinationMetadata stashes REST-only destination metadata
// (clustering, CMEK, time partitioning) so tables.get round-trips what
// the query job supplied.
func PersistDestinationMetadata(store MetadataStore, cfg *jobs.JobConfigurationQuery, projectID string) {
        if store == nil || cfg == nil || cfg.DestinationTable == nil {
                return
        }
        if cfg.Clustering == nil && cfg.TimePartitioning == nil &&
                cfg.DestinationEncryptionConfiguration == nil {
                return
        }
        destProject := cfg.DestinationTable.ProjectID
        if destProject == "" {
                destProject = projectID
        }
        patch := bqtypes.Table{
                Clustering:              cfg.Clustering,
                TimePartitioning:        cfg.TimePartitioning,
                EncryptionConfiguration: cfg.DestinationEncryptionConfiguration,
        }
        store.MergeTable(destProject, cfg.DestinationTable.DatasetID,
                cfg.DestinationTable.TableID, patch)
}

// MaterializeImplicitDestination registers an anonymous results table for
// SELECT jobs that omit destinationTable so clients can read
// query_job.destination and list_rows for pagination samples.
func MaterializeImplicitDestination(ctx context.Context, catalog enginepb.CatalogClient,
        projectID, defaultDatasetID, jobID string,
        resultSchema *bqtypes.TableSchema, rows []bqtypes.Row,
) (*bqtypes.TableReference, error) {
        if catalog == nil || resultSchema == nil || len(rows) == 0 {
                return nil, errors.New("implicit destination requires catalog, schema, and rows")
        }
        datasetID := strings.TrimSpace(defaultDatasetID)
        if datasetID == "" {
                datasetID = implicitDestDatasetID
        }
        tableID := sanitizeJobTableID(jobID)
        if err := load.EnsureDataset(ctx, catalog, projectID, datasetID); err != nil {
                return nil, err
        }
        protoSchema := load.SchemaToProto(resultSchema)
        if err := load.EnsureDestinationTable(ctx, catalog, projectID, datasetID, tableID,
                writeTruncate, protoSchema); err != nil {
                return nil, err
        }
        ref := seed.TableRef{ProjectID: projectID, DatasetID: datasetID, TableID: tableID}
        applier := seed.NewCatalogApplier(catalog)
        if _, err := applier.InsertRows(ctx, ref, protoSchema, restRowsToMaps(resultSchema, rows)); err != nil {
                return nil, err
        }
        return &bqtypes.TableReference{
                ProjectID: projectID,
                DatasetID: datasetID,
                TableID:   tableID,
        }, nil
}

func sanitizeJobTableID(jobID string) string {
        id := nonIdentRE.ReplaceAllString(jobID, "_")
        if id == "" {
                return "query_results"
        }
        return id
}

func restRowsToMaps(schema *bqtypes.TableSchema, rows []bqtypes.Row) []map[string]any {
        if schema == nil || len(rows) == 0 {
                return nil
        }
        out := make([]map[string]any, 0, len(rows))
        for _, row := range rows {
                m := make(map[string]any, len(schema.Fields))
                for i, f := range schema.Fields {
                        if i < len(row.F) {
                                m[f.Name] = restFieldValue(f, row.F[i])
                        }
                }
                out = append(out, m)
        }
        return out
}

// restFieldValue converts a REST query-result cell into the map-shaped
// value seed.InsertRows expects. STRUCT columns arrive as nested Row
// objects (positional f/v), not map[string]any.
func restFieldValue(f bqtypes.TableFieldSchema, c bqtypes.Cell) any {
        if isRESTStructFieldType(f.Type) {
                if nested, ok := c.V.(bqtypes.Row); ok {
                        m := make(map[string]any, len(f.Fields))
                        for j, sub := range f.Fields {
                                if j < len(nested.F) {
                                        m[sub.Name] = restFieldValue(sub, nested.F[j])
                                }
                        }
                        return m
                }
        }
        if strings.EqualFold(f.Mode, "REPEATED") {
                if arr, ok := c.V.([]bqtypes.Cell); ok {
                        elem := f
                        elem.Mode = ""
                        vals := make([]any, len(arr))
                        for i, el := range arr {
                                vals[i] = restFieldValue(elem, el)
                        }
                        return vals
                }
        }
        return c.V
}

func isRESTStructFieldType(t string) bool {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case "STRUCT", "RECORD":
                return true
        default:
                return false
        }
}

package query

import (
        "context"
        "errors"
        "fmt"
        "regexp"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/jobs"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
)

var infoSchemaJobsFromRE = regexp.MustCompile(
        "(?i)`(?:[^`]+`\\.)+`[^`]*INFORMATION_SCHEMA\\.(?:JOBS_BY_PROJECT|JOBS)`?")

// ReferencesInfoSchemaJobs reports whether sql reads from JOBS / JOBS_BY_PROJECT.
func ReferencesInfoSchemaJobs(sql string) bool {
        return infoSchemaJobsFromRE.MatchString(sql)
}

// RewriteInfoSchemaJobsSQL replaces INFORMATION_SCHEMA.JOBS* table refs with
// the gateway materialized catalog table for the request project.
func RewriteInfoSchemaJobsSQL(sql, projectID string) string {
        repl := fmt.Sprintf("`%s`.`%s`.`%s`", projectID, jobs.InfoSchemaJobsDataset, jobs.InfoSchemaJobsTable)
        return infoSchemaJobsFromRE.ReplaceAllString(sql, repl)
}

// PrepareInfoSchemaJobsSnapshot registers/refreshes the internal jobs table
// before forwarding a rewritten query to the engine.
func PrepareInfoSchemaJobsSnapshot(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        reg *jobs.Registry,
        projectID string,
) error {
        if catalog == nil {
                return errors.New("info schema jobs: engine catalog required")
        }
        if reg == nil {
                return errors.New("info schema jobs: job registry required")
        }
        applier := seed.NewCatalogApplier(catalog)
        if _, err := applier.EnsureDataset(ctx, projectID, jobs.InfoSchemaJobsDataset, "US"); err != nil {
                return err
        }
        tableRef := seed.TableRef{
                ProjectID: projectID,
                DatasetID: jobs.InfoSchemaJobsDataset,
                TableID:   jobs.InfoSchemaJobsTable,
        }
        schema := jobs.InfoSchemaJobsSchema()
        engineTable := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: jobs.InfoSchemaJobsDataset,
                TableId:   jobs.InfoSchemaJobsTable,
        }
        if _, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: engineTable}); err == nil {
                if _, dropErr := catalog.DropTable(ctx, &enginepb.DropTableRequest{Table: engineTable}); dropErr != nil {
                        return fmt.Errorf("info schema jobs drop: %w", dropErr)
                }
        }
        if _, err := applier.EnsureTable(ctx, tableRef, schema); err != nil {
                return err
        }
        rows := jobs.InfoSchemaJobRows(reg, projectID)
        if len(rows) == 0 {
                return nil
        }
        if _, err := applier.InsertRows(ctx, tableRef, schema, rows); err != nil {
                return fmt.Errorf("info schema jobs insert: %w", err)
        }
        return nil
}

// PrepareEngineSQLForJobs rewrites JOBS* queries and refreshes the snapshot table.
func PrepareEngineSQLForJobs(
        ctx context.Context,
        catalog enginepb.CatalogClient,
        reg *jobs.Registry,
        projectID, sql string,
) (string, error) {
        if !ReferencesInfoSchemaJobs(sql) {
                return sql, nil
        }
        if err := PrepareInfoSchemaJobsSnapshot(ctx, catalog, reg, projectID); err != nil {
                return "", err
        }
        return RewriteInfoSchemaJobsSQL(sql, projectID), nil
}

package query

import (
        "errors"
        "fmt"
        "regexp"
        "strings"
)

// legacyBracketTableRE matches legacy SQL table references of the form
// [project:dataset.table] used by thirdparty Node/Python samples.
var legacyBracketTableRE = regexp.MustCompile(
        `\[([a-zA-Z0-9_-]+):([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\]`)

// legacyBracketDecoratorRE matches legacy snapshot decorators
// [project:dataset.table@epoch] or [project:dataset.table@-offset].
var legacyBracketDecoratorRE = regexp.MustCompile(
        `\[([a-zA-Z0-9_-]+):([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)@(-?[0-9]+)\]`)

// legacyBareTableRE matches [dataset.table] when no project is given.
var legacyBareTableRE = regexp.MustCompile(`\[([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)\]`)

// legacyBareDecoratorRE matches [dataset.table@epoch] without a project.
var legacyBareDecoratorRE = regexp.MustCompile(
        `\[([a-zA-Z0-9_]+)\.([a-zA-Z0-9_]+)@(-?[0-9]+)\]`)

// PrepareEngineSQL translates limited legacy SQL to GoogleSQL when
// useLegacy is true. The engine only accepts GoogleSQL; callers must
// clear UseLegacySql on the forwarded enginepb.QueryRequest.
func PrepareEngineSQL(useLegacy bool, sql, projectID, defaultDataset string) (string, error) {
        if useLegacy {
                normalized, err := NormalizeLegacySQL(sql, projectID, defaultDataset)
                if err != nil {
                        return "", err
                }
                return LowerTableDecorators(normalized)
        }
        return LowerTableDecorators(sql)
}

// NormalizeLegacySQL rewrites bracket-style legacy table references to
// GoogleSQL backtick form. Full legacy SQL dialect is not supported.
func NormalizeLegacySQL(sql, projectID, defaultDataset string) (string, error) {
        if strings.TrimSpace(sql) == "" {
                return "", errors.New("legacy SQL query is empty")
        }
        if hasDecoratorConflict(sql) {
                return "", errors.New(
                        "cannot use table decorator with FOR SYSTEM_TIME AS OF")
        }
        out := legacyBracketDecoratorRE.ReplaceAllStringFunc(sql, func(match string) string {
                parts := legacyBracketDecoratorRE.FindStringSubmatch(match)
                if len(parts) != 5 {
                        return match
                }
                epoch, err := resolveDecoratorEpoch(parts[4])
                if err != nil {
                        return match
                }
                return fmt.Sprintf("`%s.%s.%s` FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
                        parts[1], parts[2], parts[3], epoch)
        })
        out = legacyBracketTableRE.ReplaceAllStringFunc(out, func(match string) string {
                parts := legacyBracketTableRE.FindStringSubmatch(match)
                if len(parts) != 4 {
                        return match
                }
                return fmt.Sprintf("`%s.%s.%s`", parts[1], parts[2], parts[3])
        })
        if legacyBracketTableRE.MatchString(out) {
                return "", errors.New("legacy SQL contains unsupported table reference syntax")
        }
        if legacyBareDecoratorRE.MatchString(out) {
                var err error
                out, err = normalizeLegacyBareDecorators(out, projectID)
                if err != nil {
                        return "", err
                }
        }
        if legacyBareTableRE.MatchString(out) {
                var err error
                out, err = normalizeLegacyBareTables(out, projectID)
                if err != nil {
                        return "", err
                }
        }
        _ = defaultDataset // reserved for future bare-table defaulting
        return out, nil
}

func normalizeLegacyBareDecorators(out, projectID string) (string, error) {
        project := strings.TrimSpace(projectID)
        if project == "" {
                return "", errors.New("legacy SQL [dataset.table@epoch] requires a project context")
        }
        return legacyBareDecoratorRE.ReplaceAllStringFunc(out, func(match string) string {
                parts := legacyBareDecoratorRE.FindStringSubmatch(match)
                if len(parts) != 4 {
                        return match
                }
                epoch, err := resolveDecoratorEpoch(parts[3])
                if err != nil {
                        return match
                }
                return fmt.Sprintf("`%s.%s.%s` FOR SYSTEM_TIME AS OF TIMESTAMP_MILLIS(%d)",
                        project, parts[1], parts[2], epoch)
        }), nil
}

func normalizeLegacyBareTables(out, projectID string) (string, error) {
        project := strings.TrimSpace(projectID)
        if project == "" {
                return "", errors.New("legacy SQL [dataset.table] requires a project context")
        }
        return legacyBareTableRE.ReplaceAllStringFunc(out, func(match string) string {
                parts := legacyBareTableRE.FindStringSubmatch(match)
                if len(parts) != 3 {
                        return match
                }
                return fmt.Sprintf("`%s.%s.%s`", project, parts[1], parts[2])
        }), nil
}

package routines

import (
        "strings"
        "unicode"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

const (
        routineTypeScalarFunction = "SCALAR_FUNCTION"
        routineTypeTableFunction  = "TABLE_VALUED_FUNCTION"
        routineTypeProcedure      = "PROCEDURE"
        routineLanguageSQL        = "SQL"
        sqlTypeArray              = "ARRAY"
        sqlTypeStruct             = "STRUCT"
        sqlTypeAnyType            = "ANY TYPE"
)

// RegisterFromDDL parses CREATE FUNCTION / CREATE PROCEDURE DDL and
// upserts the routine into store. Returns the target reference when
// registration succeeds.
func RegisterFromDDL(store *Store, projectID, defaultDatasetID, sql string) *bqtypes.RoutineReference {
        rt, ok := parseCreateRoutineDDL(projectID, defaultDatasetID, sql)
        if !ok {
                return nil
        }
        store.Upsert(rt)
        ref := rt.RoutineReference
        return &ref
}

// ParseCreateRoutineDDL parses CREATE FUNCTION / PROCEDURE DDL into a
// Routine snapshot (used by REST/catalog round-trip helpers).
func ParseCreateRoutineDDL(projectID, defaultDatasetID, sql string) (bqtypes.Routine, bool) {
        return parseCreateRoutineDDL(projectID, defaultDatasetID, sql)
}

func routineLanguageFromDDL(sql string) bqtypes.RoutineLanguage {
        upper := strings.ToUpper(sql)
        switch {
        case strings.Contains(upper, "LANGUAGE PYTHON"):
                return bqtypes.RoutineLanguage("PYTHON")
        case strings.Contains(upper, "LANGUAGE JS"):
                return bqtypes.RoutineLanguage("JS")
        default:
                return routineLanguageSQL
        }
}

func parseCreateRoutineDDL(projectID, defaultDatasetID, sql string) (bqtypes.Routine, bool) {
        rest, routineType, ok := stripCreateRoutineHeader(sql)
        if !ok {
                return bqtypes.Routine{}, false
        }
        name, rest, ok := parseQuotedName(rest)
        if !ok {
                return bqtypes.Routine{}, false
        }
        pID, dID, rID := splitRoutineName(projectID, defaultDatasetID, name)
        args, returnType, body, ok := parseRoutineSignature(rest)
        if !ok {
                return bqtypes.Routine{}, false
        }
        now := nowMillis()
        rt := bqtypes.Routine{
                Etag: MintEtag(),
                RoutineReference: bqtypes.RoutineReference{
                        ProjectID: pID,
                        DatasetID: dID,
                        RoutineID: rID,
                },
                RoutineType:      bqtypes.RoutineType(routineType),
                Language:         routineLanguageFromDDL(sql),
                Arguments:        args,
                ReturnType:       returnType,
                DefinitionBody:   body,
                CreationTime:     now,
                LastModifiedTime: now,
        }
        if opts := parsePythonOptionsFromDDL(sql); opts != nil {
                rt.PythonOptions = opts
        }
        return rt, true
}

func stripCreateRoutineHeader(sql string) (rest, routineType string, ok bool) {
        trimmed := strings.TrimSpace(sql)
        upper := strings.ToUpper(trimmed)
        switch {
        case strings.HasPrefix(upper, "CREATE OR REPLACE FUNCTION"),
                strings.HasPrefix(upper, "CREATE FUNCTION"):
                routineType = routineTypeScalarFunction
        case strings.HasPrefix(upper, "CREATE OR REPLACE TABLE FUNCTION"),
                strings.HasPrefix(upper, "CREATE TABLE FUNCTION"):
                routineType = routineTypeTableFunction
        case strings.HasPrefix(upper, "CREATE OR REPLACE PROCEDURE"),
                strings.HasPrefix(upper, "CREATE PROCEDURE"):
                routineType = routineTypeProcedure
        default:
                return "", "", false
        }
        rest = trimmed
        for _, prefix := range []string{
                "CREATE OR REPLACE TABLE FUNCTION",
                "CREATE TABLE FUNCTION",
                "CREATE OR REPLACE FUNCTION",
                "CREATE FUNCTION",
                "CREATE OR REPLACE PROCEDURE",
                "CREATE PROCEDURE",
        } {
                if len(rest) >= len(prefix) && strings.EqualFold(rest[:len(prefix)], prefix) {
                        return strings.TrimSpace(rest[len(prefix):]), routineType, true
                }
        }
        return "", "", false
}

func skipLanguageAndOptions(rest string) string {
        rest = strings.TrimSpace(rest)
        for {
                upper := strings.ToUpper(rest)
                if strings.HasPrefix(upper, "LANGUAGE") {
                        rest = strings.TrimSpace(rest[len("LANGUAGE"):])
                        for len(rest) > 0 && !unicode.IsSpace(rune(rest[0])) {
                                rest = rest[1:]
                        }
                        rest = strings.TrimSpace(rest)
                        continue
                }
                if strings.HasPrefix(upper, "OPTIONS") {
                        rest = strings.TrimSpace(rest[len("OPTIONS"):])
                        if !strings.HasPrefix(rest, "(") {
                                break
                        }
                        _, rest, _ = scanBalanced(rest, '(', ')')
                        rest = strings.TrimSpace(rest)
                        continue
                }
                break
        }
        return rest
}

func parseRoutineSignature(rest string) (args []bqtypes.RoutineArgument,
        returnType *bqtypes.StandardSqlDataType, body string, ok bool,
) {
        if !strings.HasPrefix(rest, "(") {
                return nil, nil, "", false
        }
        argsRaw, rest, ok := scanBalanced(rest, '(', ')')
        if !ok {
                return nil, nil, "", false
        }
        args, _ = parseArgumentList(strings.TrimSpace(argsRaw))
        rest = strings.TrimSpace(rest)
        if strings.HasPrefix(strings.ToUpper(rest), "RETURNS") {
                rest = strings.TrimSpace(rest[len("RETURNS"):])
                typeRaw, consumed, typed := scanSQLType(rest)
                if !typed {
                        return nil, nil, "", false
                }
                returnType = typeRaw
                rest = strings.TrimSpace(rest[consumed:])
        }
        rest = skipLanguageAndOptions(rest)
        rest = strings.TrimSpace(rest)
        if !strings.HasPrefix(strings.ToUpper(rest), "AS") {
                return nil, nil, "", false
        }
        rest = strings.TrimSpace(rest[len("AS"):])
        body, ok = parseDefinitionBody(rest)
        if !ok || body == "" {
                return nil, nil, "", false
        }
        return args, returnType, body, true
}

func parseQuotedName(s string) (name, rest string, ok bool) {
        s = strings.TrimSpace(s)
        if len(s) == 0 {
                return "", "", false
        }
        if s[0] == '`' {
                end := strings.Index(s[1:], "`")
                if end < 0 {
                        return "", "", false
                }
                return s[1 : end+1], strings.TrimSpace(s[end+2:]), true
        }
        // Unquoted identifier: read until '(' or whitespace boundary.
        i := 0
        for i < len(s) && !unicode.IsSpace(rune(s[i])) && s[i] != '(' {
                i++
        }
        if i == 0 {
                return "", "", false
        }
        return s[:i], strings.TrimSpace(s[i:]), true
}

func splitRoutineName(projectID, defaultDatasetID, name string) (project, dataset, routine string) {
        parts := strings.Split(name, ".")
        switch len(parts) {
        case 1:
                return projectID, defaultDatasetID, parts[0]
        case 2:
                return projectID, parts[0], parts[1]
        default:
                return parts[0], parts[1], parts[len(parts)-1]
        }
}

func scanBalanced(s string, open, close byte) (inner, rest string, ok bool) {
        if len(s) == 0 || s[0] != open {
                return "", "", false
        }
        depth := 0
        angle := 0
        for i := 0; i < len(s); i++ {
                switch s[i] {
                case '<':
                        angle++
                case '>':
                        if angle > 0 {
                                angle--
                        }
                case open:
                        if angle == 0 {
                                depth++
                        }
                case close:
                        if angle == 0 {
                                depth--
                                if depth == 0 {
                                        return s[1:i], strings.TrimSpace(s[i+1:]), true
                                }
                        }
                }
        }
        return "", "", false
}

func parseArgumentList(raw string) ([]bqtypes.RoutineArgument, bool) {
        if raw == "" {
                return nil, true
        }
        var out []bqtypes.RoutineArgument
        for len(raw) > 0 {
                raw = strings.TrimSpace(raw)
                if raw == "" {
                        break
                }
                nameEnd := 0
                for nameEnd < len(raw) && (unicode.IsLetter(rune(raw[nameEnd])) ||
                        unicode.IsDigit(rune(raw[nameEnd])) || raw[nameEnd] == '_') {
                        nameEnd++
                }
                if nameEnd == 0 {
                        return nil, false
                }
                name := raw[:nameEnd]
                raw = strings.TrimSpace(raw[nameEnd:])
                typ, consumed, ok := scanSQLType(raw)
                if !ok {
                        return nil, false
                }
                out = append(out, bqtypes.RoutineArgument{
                        Name:     name,
                        DataType: typ,
                })
                raw = strings.TrimSpace(raw[consumed:])
                if raw == "" {
                        break
                }
                if raw[0] != ',' {
                        return nil, false
                }
                raw = strings.TrimSpace(raw[1:])
        }
        return out, true
}

func scanSQLType(s string) (*bqtypes.StandardSqlDataType, int, bool) {
        s = strings.TrimSpace(s)
        if s == "" {
                return nil, 0, false
        }
        upper := strings.ToUpper(s)
        switch {
        case strings.HasPrefix(upper, "ANY TYPE"):
                return &bqtypes.StandardSqlDataType{
                        TypeKind: bqtypes.SQLTypeKind("ANY TYPE"),
                }, len("ANY TYPE"), true
        case strings.HasPrefix(upper, "ARRAY<"):
                inner, consumed, ok := scanAngleInner(s[len("ARRAY<"):])
                if !ok {
                        return nil, 0, false
                }
                elem, _, ok := scanSQLType(inner)
                if !ok {
                        return nil, 0, false
                }
                total := len("ARRAY<") + consumed
                return &bqtypes.StandardSqlDataType{
                        TypeKind:         bqtypes.SQLTypeKind(sqlTypeArray),
                        ArrayElementType: elem,
                }, total, true
        case strings.HasPrefix(upper, "STRUCT<"):
                inner, consumed, ok := scanAngleInner(s[len("STRUCT<"):])
                if !ok {
                        return nil, 0, false
                }
                fields, ok := parseStructFields(inner)
                if !ok {
                        return nil, 0, false
                }
                total := len("STRUCT<") + consumed
                return &bqtypes.StandardSqlDataType{
                        TypeKind: bqtypes.SQLTypeKind(sqlTypeStruct),
                        StructType: &bqtypes.StandardSqlStructType{
                                Fields: fields,
                        },
                }, total, true
        default:
                end := 0
                for end < len(s) && (unicode.IsLetter(rune(s[end])) ||
                        unicode.IsDigit(rune(s[end])) || s[end] == '_') {
                        end++
                }
                if end == 0 {
                        return nil, 0, false
                }
                return &bqtypes.StandardSqlDataType{
                        TypeKind: bqtypes.SQLTypeKind(strings.ToUpper(s[:end])),
                }, end, true
        }
}

func scanAngleInner(s string) (inner string, consumed int, ok bool) {
        depth := 1
        for i := 0; i < len(s); i++ {
                switch s[i] {
                case '<':
                        depth++
                case '>':
                        depth--
                        if depth == 0 {
                                return s[:i], i + 1, true
                        }
                }
        }
        return "", 0, false
}

func parseStructFields(raw string) ([]bqtypes.StandardSqlField, bool) {
        raw = strings.TrimSpace(raw)
        if raw == "" {
                return nil, true
        }
        var out []bqtypes.StandardSqlField
        for len(raw) > 0 {
                raw = strings.TrimSpace(raw)
                nameEnd := 0
                for nameEnd < len(raw) && (unicode.IsLetter(rune(raw[nameEnd])) ||
                        unicode.IsDigit(rune(raw[nameEnd])) || raw[nameEnd] == '_') {
                        nameEnd++
                }
                if nameEnd == 0 {
                        return nil, false
                }
                name := raw[:nameEnd]
                raw = strings.TrimSpace(raw[nameEnd:])
                typ, consumed, ok := scanSQLType(raw)
                if !ok {
                        return nil, false
                }
                out = append(out, bqtypes.StandardSqlField{
                        Name: name,
                        Type: *typ,
                })
                raw = strings.TrimSpace(raw[consumed:])
                if raw == "" {
                        break
                }
                if raw[0] != ',' {
                        return nil, false
                }
                raw = strings.TrimSpace(raw[1:])
        }
        return out, true
}

func parseDefinitionBody(s string) (string, bool) {
        s = strings.TrimSpace(s)
        if len(s) == 0 {
                return "", false
        }
        if s[0] == '(' {
                inner, _, ok := scanBalanced(s, '(', ')')
                return strings.TrimSpace(inner), ok
        }
        // Language-specific quoted bodies (JavaScript / Python UDFs) — take
        // the first quoted span verbatim.
        if s[0] == 'r' && len(s) > 1 && (s[1] == '\'' || s[1] == '"') {
                s = s[1:]
        }
        if len(s) >= 3 && (s[0] == '\'' || s[0] == '"') && s[0] == s[1] && s[1] == s[2] {
                quote := s[0]
                end := strings.Index(s[3:], strings.Repeat(string(quote), 3))
                if end < 0 {
                        return "", false
                }
                return s[3 : 3+end], true
        }
        if s[0] == '\'' || s[0] == '"' {
                quote := s[0]
                var b strings.Builder
                escaped := false
                for i := 1; i < len(s); i++ {
                        c := s[i]
                        if escaped {
                                b.WriteByte(c)
                                escaped = false
                                continue
                        }
                        if c == '\\' {
                                escaped = true
                                continue
                        }
                        if c == quote {
                                return b.String(), true
                        }
                        b.WriteByte(c)
                }
                return "", false
        }
        return s, true
}

package routines

import (
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

func parsePythonOptionsFromDDL(sql string) *bqtypes.PythonOptions {
        upper := strings.ToUpper(sql)
        pos := strings.Index(upper, "OPTIONS")
        if pos < 0 {
                return nil
        }
        rest := strings.TrimSpace(sql[pos+len("OPTIONS"):])
        if !strings.HasPrefix(rest, "(") {
                return nil
        }
        inner, _, ok := scanBalanced(rest, '(', ')')
        if !ok {
                return nil
        }
        opts := &bqtypes.PythonOptions{}
        for part := range strings.SplitSeq(inner, ",") {
                part = strings.TrimSpace(part)
                if part == "" {
                        continue
                }
                key, value, found := strings.Cut(part, "=")
                if !found {
                        continue
                }
                key = strings.TrimSpace(strings.Trim(key, `"'`))
                value = strings.TrimSpace(value)
                switch strings.ToUpper(key) {
                case "ENTRY_POINT":
                        opts.EntryPoint = parseOptionStringLiteral(value)
                case "PACKAGES":
                        opts.Packages = parseOptionStringArray(value)
                }
        }
        if opts.EntryPoint == "" && len(opts.Packages) == 0 {
                return nil
        }
        return opts
}

func parseOptionStringLiteral(value string) string {
        value = strings.TrimSpace(value)
        if len(value) >= 2 {
                quote := value[0]
                if (quote == '\'' || quote == '"') && value[len(value)-1] == quote {
                        return value[1 : len(value)-1]
                }
        }
        return strings.Trim(value, `"'`)
}

func parseOptionStringArray(value string) []string {
        value = strings.TrimSpace(value)
        if !strings.HasPrefix(value, "[") {
                return nil
        }
        inner, _, ok := scanBalanced(value, '[', ']')
        if !ok {
                return nil
        }
        var out []string
        for part := range strings.SplitSeq(inner, ",") {
                part = strings.TrimSpace(part)
                if part == "" {
                        continue
                }
                out = append(out, parseOptionStringLiteral(part))
        }
        return out
}

package routines

import (
        "fmt"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// BuildDDLFromRoutine renders a CREATE statement suitable for engine
// registration and DuckDB persistence from a REST Routine resource.
func BuildDDLFromRoutine(rt bqtypes.Routine) string {
        ref := rt.RoutineReference
        name := fmt.Sprintf("`%s.%s`", ref.DatasetID, ref.RoutineID)
        switch string(rt.RoutineType) {
        case routineTypeTableFunction:
                return buildTableFunctionDDL(name, rt)
        case routineTypeProcedure:
                return buildProcedureDDL(name, rt)
        default:
                return buildScalarFunctionDDL(name, rt)
        }
}

func buildScalarFunctionDDL(name string, rt bqtypes.Routine) string {
        var b strings.Builder
        b.WriteString("CREATE OR REPLACE FUNCTION ")
        b.WriteString(name)
        b.WriteString("(")
        b.WriteString(formatArgumentList(rt.Arguments))
        b.WriteString(")")
        if rt.ReturnType != nil {
                b.WriteString(" RETURNS ")
                b.WriteString(formatSQLType(rt.ReturnType))
        }
        if rt.Language != "" && !strings.EqualFold(string(rt.Language), routineLanguageSQL) {
                b.WriteString(" LANGUAGE ")
                b.WriteString(string(rt.Language))
        }
        if rt.PythonOptions != nil {
                b.WriteString(" OPTIONS (")
                var opts []string
                if rt.PythonOptions.EntryPoint != "" {
                        opts = append(opts, fmt.Sprintf("entry_point='%s'", rt.PythonOptions.EntryPoint))
                }
                if len(rt.PythonOptions.Packages) > 0 {
                        quoted := make([]string, 0, len(rt.PythonOptions.Packages))
                        for _, pkg := range rt.PythonOptions.Packages {
                                quoted = append(quoted, fmt.Sprintf("'%s'", pkg))
                        }
                        opts = append(opts, fmt.Sprintf("packages=[%s]", strings.Join(quoted, ", ")))
                }
                b.WriteString(strings.Join(opts, ", "))
                b.WriteString(")")
        }
        b.WriteString(" AS (")
        b.WriteString(rt.DefinitionBody)
        b.WriteString(")")
        return b.String()
}

func buildTableFunctionDDL(name string, rt bqtypes.Routine) string {
        var b strings.Builder
        b.WriteString("CREATE OR REPLACE TABLE FUNCTION ")
        b.WriteString(name)
        b.WriteString("(")
        b.WriteString(formatArgumentList(rt.Arguments))
        b.WriteString(") AS (")
        b.WriteString(rt.DefinitionBody)
        b.WriteString(")")
        return b.String()
}

func buildProcedureDDL(name string, rt bqtypes.Routine) string {
        var b strings.Builder
        b.WriteString("CREATE OR REPLACE PROCEDURE ")
        b.WriteString(name)
        b.WriteString("(")
        b.WriteString(formatArgumentList(rt.Arguments))
        b.WriteString(") BEGIN ")
        b.WriteString(rt.DefinitionBody)
        b.WriteString(" END")
        return b.String()
}

func formatArgumentList(args []bqtypes.RoutineArgument) string {
        if len(args) == 0 {
                return ""
        }
        parts := make([]string, 0, len(args))
        for _, arg := range args {
                typ := sqlTypeAnyType
                if arg.DataType != nil {
                        typ = formatSQLType(arg.DataType)
                }
                parts = append(parts, fmt.Sprintf("%s %s", arg.Name, typ))
        }
        return strings.Join(parts, ", ")
}

func formatSQLType(t *bqtypes.StandardSqlDataType) string {
        if t == nil {
                return sqlTypeAnyType
        }
        kind := string(t.TypeKind)
        if strings.EqualFold(kind, sqlTypeArray) && t.ArrayElementType != nil {
                return fmt.Sprintf("ARRAY<%s>", formatSQLType(t.ArrayElementType))
        }
        if strings.EqualFold(kind, sqlTypeStruct) && t.StructType != nil {
                fields := make([]string, 0, len(t.StructType.Fields))
                for _, f := range t.StructType.Fields {
                        fields = append(fields, fmt.Sprintf("%s %s", f.Name, formatSQLType(&f.Type)))
                }
                return fmt.Sprintf("STRUCT<%s>", strings.Join(fields, ", "))
        }
        return kind
}

// Package routines is the gateway-side in-memory registry of BigQuery
// Routine resources (UDFs, TVFs, stored procedures). REST handlers
// and DDL query jobs register routines here so client libraries can
// round-trip insert/get/list/update/delete without an engine catalog RPC.
package routines

import (
        "crypto/rand"
        "encoding/hex"
        "slices"
        "strings"
        "sync"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// Store holds routines keyed by projectId:datasetId.routineId.
type Store struct {
        mu       sync.RWMutex
        routines map[string]bqtypes.Routine
}

// NewStore returns an empty routine registry.
func NewStore() *Store {
        return &Store{
                routines: map[string]bqtypes.Routine{},
        }
}

func routineKey(projectID, datasetID, routineID string) string {
        return projectID + ":" + datasetID + "." + routineID
}

// Insert registers a new routine. Returns false when the key exists.
func (s *Store) Insert(rt bqtypes.Routine) bool {
        if s == nil {
                return false
        }
        ref := rt.RoutineReference
        key := routineKey(ref.ProjectID, ref.DatasetID, ref.RoutineID)
        s.mu.Lock()
        defer s.mu.Unlock()
        if _, ok := s.routines[key]; ok {
                return false
        }
        s.routines[key] = cloneRoutine(rt)
        return true
}

// Upsert registers or replaces a routine (CREATE OR REPLACE DDL).
func (s *Store) Upsert(rt bqtypes.Routine) {
        if s == nil {
                return
        }
        ref := rt.RoutineReference
        key := routineKey(ref.ProjectID, ref.DatasetID, ref.RoutineID)
        s.mu.Lock()
        defer s.mu.Unlock()
        s.routines[key] = cloneRoutine(rt)
}

// Get returns a routine snapshot and whether it was found.
func (s *Store) Get(projectID, datasetID, routineID string) (bqtypes.Routine, bool) {
        if s == nil {
                return bqtypes.Routine{}, false
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        rt, ok := s.routines[routineKey(projectID, datasetID, routineID)]
        return cloneRoutine(rt), ok
}

// Delete removes a routine. Returns false when absent.
func (s *Store) Delete(projectID, datasetID, routineID string) bool {
        if s == nil {
                return false
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := routineKey(projectID, datasetID, routineID)
        if _, ok := s.routines[key]; !ok {
                return false
        }
        delete(s.routines, key)
        return true
}

// List returns routines in the dataset, optionally filtered by
// routineType (filter format: routineType:SCALAR_FUNCTION).
func (s *Store) List(projectID, datasetID, filter string) []bqtypes.Routine {
        if s == nil {
                return nil
        }
        wantType := parseRoutineTypeFilter(filter)
        prefix := projectID + ":" + datasetID + "."
        s.mu.RLock()
        defer s.mu.RUnlock()
        keys := make([]string, 0, len(s.routines))
        for k := range s.routines {
                if strings.HasPrefix(k, prefix) {
                        keys = append(keys, k)
                }
        }
        slices.Sort(keys)
        out := make([]bqtypes.Routine, 0, len(keys))
        for _, k := range keys {
                rt := s.routines[k]
                if wantType != "" && string(rt.RoutineType) != wantType {
                        continue
                }
                out = append(out, cloneRoutine(rt))
        }
        return out
}

func parseRoutineTypeFilter(filter string) string {
        const prefix = "routineType:"
        if filter == "" || !strings.HasPrefix(filter, prefix) {
                return ""
        }
        return strings.TrimSpace(filter[len(prefix):])
}

func cloneRoutine(rt bqtypes.Routine) bqtypes.Routine {
        return rt
}

// MintEtag returns a random hex etag for a routine resource.
func MintEtag() string {
        var b [8]byte
        _, _ = rand.Read(b[:])
        return hex.EncodeToString(b[:])
}

package routines

import (
        "strconv"
        "time"
)

func nowMillis() string {
        return strconv.FormatInt(time.Now().UTC().UnixMilli(), 10)
}

package seed

import (
        "net"
        "net/http"
        "strings"
)

// AccessConfig captures the safety knobs gated on the seed routes.
// The handler closes over one of these per gateway process; mutating
// the struct after registration has no effect.
type AccessConfig struct {
        // AllowRemote, when false (the default), rejects any request
        // whose RemoteAddr is not loopback (127.0.0.0/8 or ::1). The
        // rationale: a seed
        // operation pulls down real production data and writes it
        // into a local emulator -- the call must originate from the
        // operator who owns both endpoints, not from a co-tenant
        // reachable on the LAN.
        AllowRemote bool

        // Token, when non-empty, requires every request to carry a
        // matching `X-BigQuery-Emulator-Seed-Token` header. This is
        // the additional defense for the
        // `--seed-api-allow-remote=true` case (CI runners, ephemeral
        // VMs) where loopback enforcement is not viable.
        Token string
}

// HeaderName is the canonical header name the token check reads.
// Exported so tests don't have to duplicate the literal.
const HeaderName = "X-BigQuery-Emulator-Seed-Token"

// CheckAccess enforces the loopback / token gates on r and returns
// nil when the request is allowed. On denial, returns a reason
// suitable for the BigQuery error envelope so the handler can map
// straight to 403.
//
// Order: loopback first, then token. The loopback rejection always
// wins so a misconfigured operator who left `--seed-api-allow-remote`
// off but is also sending a token doesn't get confused about which
// gate fired.
func (c AccessConfig) CheckAccess(r *http.Request) error {
        if !c.AllowRemote {
                if !isLoopback(r.RemoteAddr) {
                        return ErrAccessDenied
                }
        }
        if c.Token != "" {
                got := r.Header.Get(HeaderName)
                if !secureEqual(got, c.Token) {
                        return ErrAccessDenied
                }
        }
        return nil
}

// ErrAccessDenied is the sentinel CheckAccess returns. We don't
// distinguish between "wrong remote" and "wrong token" so an
// attacker probing the seed endpoint can't tell which check fired.
var ErrAccessDenied = httpError{code: http.StatusForbidden, msg: "seed: access denied"}

// httpError carries both the HTTP status the handler must write and
// the human-readable message. Implements error so it survives
// errors.Is comparisons.
type httpError struct {
        code int
        msg  string
}

func (e httpError) Error() string { return e.msg }

// Status returns the HTTP status code the handler should respond
// with for this error.
func (e httpError) Status() int { return e.code }

// secureEqual compares two strings in constant time wrt length.
// Constant-time only matters for the token comparison, but isolating
// the helper keeps the call site obvious.
func secureEqual(a, b string) bool {
        if len(a) != len(b) {
                return false
        }
        var diff byte
        for i := range len(a) {
                diff |= a[i] ^ b[i]
        }
        return diff == 0
}

// isLoopback reports whether remoteAddr (in net/http's
// `host:port` form) is on the local machine. We accept "no port" too
// because tests sometimes inject just an IP for httptest.
func isLoopback(remoteAddr string) bool {
        host, _, err := net.SplitHostPort(remoteAddr)
        if err != nil {
                host = remoteAddr
        }
        host = strings.TrimSpace(host)
        if host == "" {
                // Unix-socket / undefined caller; treat as loopback so
                // internal callers (e.g. a gateway that's bound to a
                // unix socket) aren't locked out.
                return true
        }
        ip := net.ParseIP(host)
        if ip == nil {
                return false
        }
        return ip.IsLoopback()
}

// Package seed contains the production-side seeding orchestrator plus
// shared types the YAML seed-file loader (gateway/seedfile) reuses
// when it applies declarative data to the engine.
//
// Both code paths ultimately call into the engine's CatalogClient
// over gRPC -- the same surface the REST handlers
// (gateway/handlers/datasets.go, tables.go, tabledata.go) drive --
// so seeded state is indistinguishable from state created via the
// public REST API.
package seed

import (
        "context"
        "encoding/base64"
        "encoding/json"
        "errors"
        "fmt"
        "strconv"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// Applier is the narrow surface seeding code drives to mutate the
// emulator's catalog and rows. It is implemented on top of an
// enginepb.CatalogClient by NewCatalogApplier; tests pass a fake
// implementation so the orchestrator and YAML loader can run without
// a live engine.
type Applier interface {
        // EnsureDataset registers (project, dataset) with the engine.
        // Implementations treat an ALREADY_EXISTS response as success
        // so seeding is idempotent across reruns. Created reports
        // whether the call actually changed engine state, so callers
        // can tally a "created vs skipped" counter in orchestrator metrics.
        EnsureDataset(ctx context.Context, projectID, datasetID, location string) (created bool, err error)

        // EnsureTable registers (project, dataset, table) with the
        // given schema. Same idempotency contract as EnsureDataset:
        // ALREADY_EXISTS surfaces as `created=false, err=nil`. The
        // schema is taken at face value; callers that want to evolve
        // schemas across runs are responsible for dropping and
        // re-registering tables themselves.
        EnsureTable(ctx context.Context, ref TableRef, schema *enginepb.TableSchema) (created bool, err error)

        // InsertRows appends rows to (ref) in a single RPC. Schema is
        // the table's column order so callers can pass a generic
        // map-shaped row and the applier lays cells out positionally.
        // Returns the number of rows inserted on success.
        InsertRows(ctx context.Context, ref TableRef, schema *enginepb.TableSchema, rows []map[string]any) (int, error)
}

// TableRef is the (project, dataset, table) triple the applier API
// passes around. We keep it in this package -- rather than reusing
// enginepb.TableRef directly -- so callers don't need to import the
// generated proto package just to name a destination.
type TableRef struct {
        ProjectID string
        DatasetID string
        TableID   string
}

// catalogApplier is the production Applier implementation backed by
// the gRPC CatalogClient.
type catalogApplier struct {
        client enginepb.CatalogClient
}

// NewCatalogApplier wraps a CatalogClient so it satisfies Applier.
// The returned applier holds no state of its own; passing the same
// CatalogClient to multiple appliers is safe.
func NewCatalogApplier(c enginepb.CatalogClient) Applier {
        return &catalogApplier{client: c}
}

// EnsureDataset wraps Catalog.RegisterDataset with idempotency: an
// ALREADY_EXISTS response is treated as a successful no-op so seed
// reruns don't fail the entire batch.
func (a *catalogApplier) EnsureDataset(ctx context.Context, projectID, datasetID, location string) (bool, error) {
        if a == nil || a.client == nil {
                return false, errors.New("seed: nil CatalogClient; engine subprocess required to ensure dataset")
        }
        _, err := a.client.RegisterDataset(ctx, &enginepb.RegisterDatasetRequest{
                Dataset: &enginepb.DatasetRef{
                        ProjectId: projectID,
                        DatasetId: datasetID,
                },
                Location: location,
        })
        if err != nil {
                if isAlreadyExists(err) {
                        return false, nil
                }
                return false, fmt.Errorf("RegisterDataset %s.%s: %w", projectID, datasetID, err)
        }
        return true, nil
}

// EnsureTable wraps Catalog.RegisterTable with the same idempotency
// contract as EnsureDataset.
func (a *catalogApplier) EnsureTable(ctx context.Context, ref TableRef, schema *enginepb.TableSchema) (bool, error) {
        if a == nil || a.client == nil {
                return false, errors.New("seed: nil CatalogClient; engine subprocess required to ensure table")
        }
        _, err := a.client.RegisterTable(ctx, &enginepb.RegisterTableRequest{
                Table: &enginepb.TableRef{
                        ProjectId: ref.ProjectID,
                        DatasetId: ref.DatasetID,
                        TableId:   ref.TableID,
                },
                Schema: schema,
        })
        if err != nil {
                if isAlreadyExists(err) {
                        return false, nil
                }
                return false, fmt.Errorf("RegisterTable %s.%s.%s: %w",
                        ref.ProjectID, ref.DatasetID, ref.TableID, err)
        }
        return true, nil
}

// InsertRows lays each map-shaped row out positionally against the
// table's schema before forwarding to Catalog.InsertRows. Missing
// columns become NULL cells so the cell count stays in sync with
// the column count Storage::AppendRows expects (mirrors the same
// rule TableDataInsertAll applies for REST inserts).
func (a *catalogApplier) InsertRows(
        ctx context.Context,
        ref TableRef,
        schema *enginepb.TableSchema,
        rows []map[string]any,
) (int, error) {
        if a == nil || a.client == nil {
                return 0, errors.New("seed: nil CatalogClient; engine subprocess required to insert rows")
        }
        if len(rows) == 0 {
                return 0, nil
        }
        dataRows := make([]*enginepb.DataRow, 0, len(rows))
        for _, row := range rows {
                dataRows = append(dataRows, rowToProto(schema, row))
        }
        _, err := a.client.InsertRows(ctx, &enginepb.InsertRowsRequest{
                Table: &enginepb.TableRef{
                        ProjectId: ref.ProjectID,
                        DatasetId: ref.DatasetID,
                        TableId:   ref.TableID,
                },
                Rows: dataRows,
        })
        if err != nil {
                return 0, fmt.Errorf("InsertRows %s.%s.%s (%d rows): %w",
                        ref.ProjectID, ref.DatasetID, ref.TableID, len(rows), err)
        }
        return len(rows), nil
}

// rowToProto lays a map-shaped row out positionally against the
// schema, mirroring jsonRowToProto in gateway/handlers/tabledata.go.
// Pulled into its own helper so both seeding paths (production
// orchestrator and YAML loader) emit the same wire shape.
func rowToProto(schema *enginepb.TableSchema, row map[string]any) *enginepb.DataRow {
        out := &enginepb.DataRow{Cells: make([]*enginepb.Cell, 0, len(schema.GetFields()))}
        for _, f := range schema.GetFields() {
                v, ok := row[f.GetName()]
                if !ok {
                        out.Cells = append(out.Cells, nullCell())
                        continue
                }
                out.Cells = append(out.Cells, cellFromJSONForField(f, v))
        }
        return out
}

func cellFromJSONForField(f *enginepb.FieldSchema, v any) *enginepb.Cell {
        if f == nil {
                return ValueToCell(v)
        }
        if isRepeatedFieldMode(f.GetMode()) {
                arr, ok := v.([]any)
                if !ok {
                        return ValueToCell(v)
                }
                elemSchema := repeatedElementSchema(f)
                out := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(arr))}
                for _, el := range arr {
                        out.Elements = append(out.Elements, cellFromJSONForField(elemSchema, el))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: out}}
        }
        if isStructFieldType(f.GetType()) {
                m, ok := v.(map[string]any)
                if !ok {
                        return ValueToCell(v)
                }
                st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(f.GetFields()))}
                for _, sub := range f.GetFields() {
                        subV, ok := m[sub.GetName()]
                        if !ok {
                                st.Fields = append(st.Fields, nullCell())
                                continue
                        }
                        st.Fields = append(st.Fields, cellFromJSONForField(sub, subV))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
        }
        return ValueToCell(v)
}

func isRepeatedFieldMode(mode string) bool {
        return strings.EqualFold(strings.TrimSpace(mode), bqModeRepeated)
}

func repeatedElementSchema(f *enginepb.FieldSchema) *enginepb.FieldSchema {
        if f == nil {
                return nil
        }
        return &enginepb.FieldSchema{
                Name:        f.GetName(),
                Type:        f.GetType(),
                Description: f.GetDescription(),
                Fields:      f.GetFields(),
        }
}

func isStructFieldType(t string) bool {
        switch strings.ToUpper(strings.TrimSpace(t)) {
        case bqTypeStruct, bqTypeRecord:
                return true
        default:
                return false
        }
}

func nullCell() *enginepb.Cell {
        return &enginepb.Cell{Value: &enginepb.Cell_NullValue{NullValue: true}}
}

// ValueToCell converts a generic Go value into a proto Cell using the
// same conventions as gateway/handlers/tabledata.jsonToCell. Exported
// so the YAML loader and tests can reuse the conversion without
// reimplementing the (long, type-switch-heavy) logic.
//
// Conventions:
//   - nil          -> Cell.null_value = true
//   - bool         -> "true"/"false"
//   - json.Number  -> decimal string verbatim
//   - float64/int  -> formatted decimal string
//   - string       -> string verbatim
//   - []byte       -> base64-encoded string (BYTES wire shape)
//   - []any        -> Array of converted cells
//   - map[string]any -> Struct (field order = map iteration order;
//     callers that need a deterministic order should pre-marshal to
//     a slice of {k, v} pairs and pass it through []any).
//
// Do not use this helper for typed STRUCT or REPEATED columns: it
// ignores map keys and assigns values in Go map iteration order,
// which swaps named subfields (e.g. REPEATED STRUCT<key STRING,
// value JSON>). Always route STRUCT/REPEATED values through
// cellFromJSONForField with the table schema.
func ValueToCell(v any) *enginepb.Cell {
        if v == nil {
                return nullCell()
        }
        switch val := v.(type) {
        case bool:
                if val {
                        return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "true"}}
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: "false"}}
        case json.Number:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: string(val)}}
        case float64:
                if val == float64(int64(val)) {
                        return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                                StringValue: strconv.FormatInt(int64(val), 10),
                        }}
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: strconv.FormatFloat(val, 'g', -1, 64),
                }}
        case int:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: strconv.Itoa(val),
                }}
        case int64:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: strconv.FormatInt(val, 10),
                }}
        case string:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{StringValue: val}}
        case []byte:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: base64.StdEncoding.EncodeToString(val),
                }}
        case []any:
                arr := &enginepb.Array{Elements: make([]*enginepb.Cell, 0, len(val))}
                for _, el := range val {
                        arr.Elements = append(arr.Elements, ValueToCell(el))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_Array{Array: arr}}
        case map[string]any:
                // Schema-blind: values only, keys discarded. See doc comment.
                st := &enginepb.Struct{Fields: make([]*enginepb.Cell, 0, len(val))}
                for _, fv := range val {
                        st.Fields = append(st.Fields, ValueToCell(fv))
                }
                return &enginepb.Cell{Value: &enginepb.Cell_StructValue{StructValue: st}}
        default:
                return &enginepb.Cell{Value: &enginepb.Cell_StringValue{
                        StringValue: fmt.Sprintf("%v", val),
                }}
        }
}

// Defaults captures the gateway-level fallback values seeding uses
// when callers (REST clients or YAML files) leave a project or
// dataset location empty. The gateway package builds one of these
// from its Options struct (see gateway/seed_runner.go) so the seed
// package itself never imports the gateway package and the two
// can stay free of import cycles.
type Defaults struct {
        ProjectID       string
        DatasetLocation string
}

package seed

import (
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

// isAlreadyExists reports whether err is a gRPC ALREADY_EXISTS
// response from the engine. Extracted into its own helper so every
// "ensure" path in the applier shares one decision point; if the
// engine ever starts using a different code for the duplicate case
// (e.g. FAILED_PRECONDITION with a typed status detail) this is the
// one function to update.
func isAlreadyExists(err error) bool {
        if err == nil {
                return false
        }
        st, ok := status.FromError(err)
        if !ok {
                return false
        }
        return st.Code() == codes.AlreadyExists
}

package seed

import (
        "context"
        "encoding/json"
        "errors"
        "io"
        "log" //nolint:depguard // matches the rest of the gateway package's existing log usage; slog migration is out of scope for this change
        "net/http"
        "strings"
)

// Status strings reused across the JSON error envelope responses.
// Pulled into named constants so the handler doesn't repeat the same
// literal three times (and so a typo can't sneak past a grep).
const (
        statusInvalid        = "invalid"
        statusNotImplemented = "notImplemented"
        statusNotFound       = "notFound"
)

// Runner is the interface the HTTP handler uses to dispatch a
// validated SeedRequest to whoever actually copies production data
// into the emulator. The production orchestrator implements this;
// tests inject a fake that returns canned SeedResults so the
// handler can be exercised without a network round-trip.
type Runner interface {
        Run(ctx context.Context, req SeedRequest) (*SeedResult, error)
}

// HandlerDeps bundles everything the seed handler set needs at
// registration time. Kept as a struct so the call from gateway/
// server.go stays readable.
type HandlerDeps struct {
        // Access enforces loopback / token gates. Per-process; the
        // gateway constructs one from gateway.Options.
        Access AccessConfig

        // Store is the per-process operation registry. The handler
        // creates one operation per POST and looks up the right one
        // on GET .../operations/{id}.
        Store *Store

        // Runner does the actual seeding work. Nil means the build
        // does not include a production runner; the POST handler
        // surfaces 501 NotImplemented with a documented reason so
        // operators can tell "the route is wired" from "the build
        // can't help me".
        Runner Runner
}

// RegisterRoutes installs `POST /api/emulator/seed` and
// `GET /api/emulator/seed/operations/{operationId}` on mux. Idempotent;
// callers wire it once from gateway.NewServer when EnableSeedAPI is
// true.
func RegisterRoutes(mux *http.ServeMux, deps HandlerDeps) {
        if deps.Store == nil {
                deps.Store = NewStore()
        }
        mux.HandleFunc("POST /api/emulator/seed", deps.handlePost)
        mux.HandleFunc("GET /api/emulator/seed/operations/{operationId}", deps.handleGet)
}

// handlePost accepts a SeedRequest, validates it, mints a new
// operation in the store, and (when a Runner is configured) kicks
// off the actual seeding work on a background goroutine. The
// response is the freshly-minted operation envelope; callers poll
// the GET endpoint for completion.
func (d HandlerDeps) handlePost(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code:    http.StatusBadRequest,
                        Status:  statusInvalid,
                        Message: "Could not read seed request body: " + err.Error(),
                })
                return
        }
        req, err := DecodeRequest(body)
        if err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code:    http.StatusBadRequest,
                        Status:  statusInvalid,
                        Message: err.Error(),
                })
                return
        }
        if err := req.Validate(); err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code:    http.StatusBadRequest,
                        Status:  statusInvalid,
                        Message: err.Error(),
                })
                return
        }
        if d.Runner == nil {
                // The route exists but the build does not include a
                // production runner. Surface a 501 with the documented
                // reason rather than a 200 with an empty Result.
                writeJSON(w, http.StatusNotImplemented, errEnvelope{
                        Code:   http.StatusNotImplemented,
                        Status: statusNotImplemented,
                        Message: "Production seed is not compiled into this gateway build. " +
                                "Use --seed-data-file for declarative seeding.",
                })
                return
        }

        op := d.Store.New(req)
        go d.runOperation(op.ID, req)
        writeJSON(w, http.StatusAccepted, operationToWire(op))
}

// handleGet returns the current snapshot of an operation. We never
// surface 404 to the caller; an unknown id still returns a
// well-formed envelope with state="UNKNOWN" so polling loops have
// one less branch to handle.
func (d HandlerDeps) handleGet(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        id := r.PathValue("operationId")
        op := d.Store.Get(id)
        if op == nil {
                writeJSON(w, http.StatusNotFound, errEnvelope{
                        Code:    http.StatusNotFound,
                        Status:  statusNotFound,
                        Message: "No such seed operation: " + id,
                })
                return
        }
        writeJSON(w, http.StatusOK, operationToWire(op))
}

// runOperation moves an operation through RUNNING -> DONE/FAILED in
// the background. We give the runner a fresh context.Background so
// the HTTP request that posted the operation can complete (its
// context goes away) without cancelling the seed work; long
// operations are the norm.
func (d HandlerDeps) runOperation(id string, req SeedRequest) {
        if !d.Store.MarkRunning(id) {
                return
        }
        result, err := d.Runner.Run(context.Background(), req)
        if err != nil {
                log.Printf("seed: operation %s failed: %v", id, err)
                d.Store.MarkFailed(id, err.Error())
                return
        }
        d.Store.MarkResult(id, result)
}

// operationToWire flattens the in-memory Operation into the JSON
// envelope the polling endpoint serves. Kept as a separate type
// because the in-memory Operation carries fields (mutex receivers,
// Go-only timestamps) that don't belong on the wire.
type operationWire struct {
        ID        string         `json:"id"`
        State     OperationState `json:"state"`
        Started   string         `json:"started"`
        Finished  string         `json:"finished,omitempty"`
        Request   SeedRequest    `json:"request"`
        Result    *SeedResult    `json:"result,omitempty"`
        Error     string         `json:"error,omitempty"`
        Cancelled bool           `json:"cancelled,omitempty"`
}

func operationToWire(op *Operation) operationWire {
        w := operationWire{
                ID:        op.ID,
                State:     op.State,
                Started:   op.Started.UTC().Format("2006-01-02T15:04:05Z"),
                Request:   op.Request,
                Result:    op.Result,
                Error:     op.FatalErr,
                Cancelled: op.Cancelled,
        }
        if !op.Finished.IsZero() {
                w.Finished = op.Finished.UTC().Format("2006-01-02T15:04:05Z")
        }
        return w
}

// errEnvelope mirrors the BigQuery-shaped error response the rest
// of the gateway uses (gateway/handlers/handlers.go). Duplicating
// the shape here keeps the seed package from importing the public
// handlers package and creating an import cycle.
type errEnvelope struct {
        Code    int    `json:"code"`
        Status  string `json:"status"`
        Message string `json:"message"`
}

func writeJSON(w http.ResponseWriter, status int, body any) {
        w.Header().Set("Content-Type", "application/json; charset=utf-8")
        w.WriteHeader(status)
        _ = json.NewEncoder(w).Encode(body)
}

// writeAccessError maps the seed-specific access denial into an
// HTTP 403 with the same envelope shape the rest of the gateway
// uses. Keeps the deny path uniform across loopback and token
// failures.
func writeAccessError(w http.ResponseWriter, err error) {
        code := http.StatusForbidden
        msg := err.Error()
        var he httpError
        if errors.As(err, &he) {
                code = he.Status()
        }
        writeJSON(w, code, errEnvelope{
                Code:    code,
                Status:  "accessDenied",
                Message: strings.TrimSpace(msg),
        })
}

package seed

import (
        "context"
        "errors"
        "fmt"
        "strings"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

// ProductionReader is the narrow surface the orchestrator drives
// against the live production BigQuery side. The concrete
// implementation (gateway/seed/production_live.go, gated behind the
// `seed_production_live` build tag) wraps cloud.google.com/go/bigquery
// so the orchestrator package itself never pulls in the heavy cloud
// dependency tree unless the operator explicitly opts in.
//
// Tests stub this interface to feed deterministic metadata and rows
// without a network round-trip.
type ProductionReader interface {
        // ListDatasets enumerates the datasets in `projectID` that
        // are visible to the calling principal. Implementations may
        // stream results (cloud.google.com/go/bigquery's iterator does)
        // but the gateway-side API stays slice-shaped so the test
        // fakes are easy to write.
        ListDatasets(ctx context.Context, projectID string) ([]ProductionDataset, error)

        // ListTables enumerates tables, views, and external tables
        // inside `projectID.datasetID`. Each entry carries enough
        // metadata for the orchestrator to decide whether the table
        // is supported (physical) or needs a SeedResourceError
        // (view, model, ...).
        ListTables(ctx context.Context, projectID, datasetID string) ([]ProductionTable, error)

        // DescribeTable returns the physical schema for one table.
        // Only called for entries ListTables reported as
        // supportable (physical tables, snapshots).
        DescribeTable(ctx context.Context, projectID, datasetID, tableID string) (*enginepb.TableSchema, error)

        // ReadRows pages through one table's rows. The orchestrator
        // uses maxRows (the request's MaxRowsPerTable knob) to cap
        // the read; an implementation may stop early.
        ReadRows(ctx context.Context, projectID, datasetID, tableID string, maxRows int64) ([]map[string]any, error)
}

// ProductionDataset is the slimmed-down view of a BigQuery dataset
// the orchestrator needs. The fields map 1:1 to the dataset
// resource in cloud.google.com/go/bigquery so the live adapter is
// a thin lift.
type ProductionDataset struct {
        ProjectID string
        DatasetID string
        Location  string
}

// ProductionTable similarly carries the fields the orchestrator
// reads off the cloud client's TableMetadata.
type ProductionTable struct {
        ProjectID string
        DatasetID string
        TableID   string
        // Type is the cloud library's table-type string ("TABLE",
        // "VIEW", "MATERIALIZED_VIEW", "EXTERNAL", "MODEL", ...).
        // Anything other than "TABLE" lands in ResourceErrors with
        // kind=unsupported until the engine learns to persist them.
        Type string
}

// Orchestrator wires a ProductionReader to an Applier and runs one
// SeedRequest through them. It is the production-side
// implementation of Runner.
type Orchestrator struct {
        Reader   ProductionReader
        Applier  Applier
        Defaults Defaults

        // EnvLookup is consulted when ResolveBillingProject walks
        // the env fallback chain. Defaults to os.LookupEnv when nil
        // so production calls work without explicit wiring.
        EnvLookup func(string) (string, bool)
}

// NewOrchestrator constructs an Orchestrator with sensible defaults.
// reader must be non-nil; the constructor panics rather than letting
// the caller pass nil here because a nil reader produces confusing
// "method on nil receiver" failures deep inside Run.
func NewOrchestrator(reader ProductionReader, applier Applier, defaults Defaults) *Orchestrator {
        if reader == nil {
                panic("seed: NewOrchestrator: reader must be non-nil; pass NewProductionReader or a test stub")
        }
        if applier == nil {
                panic("seed: NewOrchestrator: applier must be non-nil")
        }
        return &Orchestrator{
                Reader:    reader,
                Applier:   applier,
                Defaults:  defaults,
                EnvLookup: LookupEnvOrEmpty,
        }
}

// Run executes one SeedRequest. The request must have already
// passed SeedRequest.Validate; the orchestrator double-checks
// anyway so a malformed request from a non-HTTP caller surfaces as
// ErrInvalidRequest rather than a panic.
//
// The returned SeedResult is always non-nil on a successful Run --
// individual resource failures accumulate in Result.ResourceErrors
// rather than aborting the whole operation. Run returns a non-nil
// error only when the entire seed cannot proceed (missing creds,
// project doesn't exist, list RPC failed).
func (o *Orchestrator) Run(ctx context.Context, req SeedRequest) (*SeedResult, error) {
        if err := req.Validate(); err != nil {
                return nil, err
        }
        result := &SeedResult{Started: nowRFC3339()}
        // BillingProject is currently consumed by the live reader;
        // computing it here lets us validate the fallback chain even
        // when tests don't go through the live adapter.
        _ = ResolveBillingProject(req, o.Defaults.ProjectID, o.envLookup())

        dest := destinationOf(req)
        switch {
        case req.Source.Table != "":
                o.seedTable(ctx, req.Source.Project, req.Source.Dataset, req.Source.Table,
                        dest.Project, dest.Dataset, dest.Table,
                        req.MaxRowsPerTable, result)
        case req.Source.Dataset != "":
                o.seedDataset(ctx, req.Source.Project, req.Source.Dataset,
                        dest.Project, dest.Dataset, req.MaxRowsPerTable, result)
        default:
                o.seedProject(ctx, req.Source.Project, dest.Project, req.MaxRowsPerTable, result)
        }
        result.Finished = nowRFC3339()
        return result, nil
}

// envLookup returns the orchestrator's configured lookup or a
// no-op when unset. Centralizing the nil-check keeps Run readable.
func (o *Orchestrator) envLookup() func(string) (string, bool) {
        if o.EnvLookup != nil {
                return o.EnvLookup
        }
        return func(string) (string, bool) { return "", false }
}

// seedProject copies every dataset under sourceProject into
// destProject. A ListDatasets failure is the only error path that
// short-circuits the entire op; per-dataset failures fold into
// ResourceErrors.
func (o *Orchestrator) seedProject(
        ctx context.Context,
        sourceProject, destProject string,
        maxRows int64,
        result *SeedResult,
) {
        datasets, err := o.Reader.ListDatasets(ctx, sourceProject)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: "project:" + sourceProject,
                        Kind:     resourceKindRPC,
                        Error:    fmt.Sprintf("ListDatasets: %v", err),
                })
                return
        }
        for _, ds := range datasets {
                o.seedDataset(ctx, sourceProject, ds.DatasetID, destProject, ds.DatasetID, maxRows, result)
        }
}

// seedDataset copies one source dataset into the destination
// project + dataset name. Caller is responsible for choosing the
// destination dataset id (mirror or override).
func (o *Orchestrator) seedDataset(
        ctx context.Context,
        sourceProject, sourceDataset, destProject, destDataset string,
        maxRows int64,
        result *SeedResult,
) {
        if destDataset == "" {
                destDataset = sourceDataset
        }
        location := ""
        if dsList, _ := o.Reader.ListDatasets(ctx, sourceProject); len(dsList) > 0 {
                for _, d := range dsList {
                        if d.DatasetID == sourceDataset {
                                location = d.Location
                                break
                        }
                }
        }
        if location == "" {
                location = o.Defaults.DatasetLocation
        }
        created, err := o.Applier.EnsureDataset(ctx, destProject, destDataset, location)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("dataset:%s.%s", destProject, destDataset),
                        Kind:     resourceKindWrite,
                        Error:    err.Error(),
                })
                return
        }
        if created {
                result.DatasetsCreated++
        } else {
                result.DatasetsSkipped++
        }

        tables, err := o.Reader.ListTables(ctx, sourceProject, sourceDataset)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("dataset:%s.%s", sourceProject, sourceDataset),
                        Kind:     resourceKindRPC,
                        Error:    fmt.Sprintf("ListTables: %v", err),
                })
                return
        }
        for _, tbl := range tables {
                o.seedTable(ctx, sourceProject, sourceDataset, tbl.TableID,
                        destProject, destDataset, tbl.TableID, maxRows, result)
        }
}

// seedTable copies a single source table into the destination.
// Unsupported source types (views, materialized views, models,
// external) are reported as ResourceErrors without aborting the
// surrounding seed.
//
// The body is split across resolveSourceTable / writeTableMetadata
// / copyTableRows helpers; the function reads top-down and never
// produces partial state because each helper short-circuits by
// appending to result.ResourceErrors.
func (o *Orchestrator) seedTable(
        ctx context.Context,
        sourceProject, sourceDataset, sourceTable, destProject, destDataset, destTable string,
        maxRows int64,
        result *SeedResult,
) {
        if destTable == "" {
                destTable = sourceTable
        }
        match, ok := o.resolveSourceTable(ctx, sourceProject, sourceDataset, sourceTable, result)
        if !ok {
                return
        }
        _ = match // already validated to be a TABLE by resolveSourceTable

        schema, ok := o.describeSourceSchema(ctx, sourceProject, sourceDataset, sourceTable, result)
        if !ok {
                return
        }
        ref := TableRef{ProjectID: destProject, DatasetID: destDataset, TableID: destTable}
        if !o.writeTableMetadata(ctx, ref, schema, result) {
                return
        }
        o.copyTableRows(ctx, sourceProject, sourceDataset, sourceTable, ref, schema, maxRows, result)
}

// resolveSourceTable looks up the source table's metadata and
// returns it when it's a supported (physical TABLE) entry.
// Unsupported types fold into a ResourceError and ok=false.
func (o *Orchestrator) resolveSourceTable(
        ctx context.Context,
        sourceProject, sourceDataset, sourceTable string,
        result *SeedResult,
) (*ProductionTable, bool) {
        tables, err := o.Reader.ListTables(ctx, sourceProject, sourceDataset)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
                        Kind:     resourceKindRPC,
                        Error:    fmt.Sprintf("ListTables: %v", err),
                })
                return nil, false
        }
        var match *ProductionTable
        for i := range tables {
                if tables[i].TableID == sourceTable {
                        match = &tables[i]
                        break
                }
        }
        if match == nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
                        Kind:     resourceKindRead,
                        Error:    "table not found",
                })
                return nil, false
        }
        if !strings.EqualFold(match.Type, "TABLE") && match.Type != "" {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
                        Kind:     resourceKindUnsupported,
                        Error: fmt.Sprintf(
                                "type %q is not yet supported by the BigQuery emulator; only physical TABLE entries are seeded",
                                match.Type,
                        ),
                })
                return nil, false
        }
        return match, true
}

// describeSourceSchema fetches the source table's schema.
func (o *Orchestrator) describeSourceSchema(
        ctx context.Context,
        sourceProject, sourceDataset, sourceTable string,
        result *SeedResult,
) (*enginepb.TableSchema, bool) {
        schema, err := o.Reader.DescribeTable(ctx, sourceProject, sourceDataset, sourceTable)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
                        Kind:     resourceKindRead,
                        Error:    fmt.Sprintf("DescribeTable: %v", err),
                })
                return nil, false
        }
        return schema, true
}

// writeTableMetadata creates the destination table and bumps the
// Created/Skipped counters. Returns false when the EnsureTable call
// errored (and a ResourceError was already appended).
func (o *Orchestrator) writeTableMetadata(
        ctx context.Context,
        ref TableRef,
        schema *enginepb.TableSchema,
        result *SeedResult,
) bool {
        created, err := o.Applier.EnsureTable(ctx, ref, schema)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", ref.ProjectID, ref.DatasetID, ref.TableID),
                        Kind:     resourceKindWrite,
                        Error:    err.Error(),
                })
                return false
        }
        if created {
                result.TablesCreated++
        } else {
                result.TablesSkipped++
        }
        return true
}

// copyTableRows reads up to maxRows rows from the source table and
// inserts them into the destination. Empty-row reads short-circuit
// the InsertRows RPC.
func (o *Orchestrator) copyTableRows(
        ctx context.Context,
        sourceProject, sourceDataset, sourceTable string,
        ref TableRef,
        schema *enginepb.TableSchema,
        maxRows int64,
        result *SeedResult,
) {
        rows, err := o.Reader.ReadRows(ctx, sourceProject, sourceDataset, sourceTable, maxRows)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", sourceProject, sourceDataset, sourceTable),
                        Kind:     resourceKindRead,
                        Error:    fmt.Sprintf("ReadRows: %v", err),
                })
                return
        }
        if len(rows) == 0 {
                return
        }
        n, err := o.Applier.InsertRows(ctx, ref, schema, rows)
        if err != nil {
                result.ResourceErrors = append(result.ResourceErrors, SeedResourceError{
                        Resource: fmt.Sprintf("table:%s.%s.%s", ref.ProjectID, ref.DatasetID, ref.TableID),
                        Kind:     resourceKindWrite,
                        Error:    err.Error(),
                })
                return
        }
        result.RowsCopied += int64(n)
}

// Resource error classification strings. Pulled into named
// constants so the orchestrator and tests both reference one
// source of truth, and so a future renamed kind value updates one
// place.
const (
        resourceKindRPC         = "rpc"
        resourceKindRead        = "read"
        resourceKindWrite       = "write"
        resourceKindUnsupported = "unsupported"
)

// destinationOf folds the optional Destination override into a
// concrete (project, dataset, table) triple the seed helpers act
// on. Empty fields map to the source values.
func destinationOf(req SeedRequest) struct{ Project, Dataset, Table string } {
        out := struct{ Project, Dataset, Table string }{
                Project: req.Source.Project,
                Dataset: req.Source.Dataset,
                Table:   req.Source.Table,
        }
        if req.Destination == nil {
                return out
        }
        if v := strings.TrimSpace(req.Destination.Project); v != "" {
                out.Project = v
        }
        if v := strings.TrimSpace(req.Destination.Dataset); v != "" {
                out.Dataset = v
        }
        if v := strings.TrimSpace(req.Destination.Table); v != "" {
                out.Table = v
        }
        return out
}

// NewProductionReader is the constructor the gateway calls when the
// operator enables the seed API. The default build returns
// ErrProductionUnsupported; building with `-tags=seed_production_live`
// swaps in the cloud.google.com/go/bigquery-backed implementation
// (gateway/seed/production_live.go).
//
// Callers that just want the YAML loader (gateway/seedfile) don't
// need to call this and don't pay for the heavy cloud deps.
//
// The unused parameter list is intentional: the live impl needs
// the project to set ADC quota and the env lookup to derive
// fallbacks for the production client signature).
func NewProductionReader(
        ctx context.Context,
        billingProject string,
        getenv func(string) (string, bool),
) (ProductionReader, error) {
        return nil, errors.New(ErrProductionUnsupported.Error())
}

package seed

import (
        "crypto/rand"
        "encoding/hex"
        "sync"
        "time"
)

// OperationState enumerates the lifecycle states a seed operation
// moves through.
//
//        Pending  -> Running -> Done | Failed
type OperationState string

const (
        OperationPending OperationState = "PENDING"
        OperationRunning OperationState = "RUNNING"
        OperationDone    OperationState = "DONE"
        OperationFailed  OperationState = "FAILED"
)

// Operation is the persisted view of an in-flight or completed seed
// operation. The HTTP handler converts this into the wire-shape
// the polling endpoint serves; keeping it as a Go struct (rather
// than a raw map) makes the store's tests easier to read.
//
// Started/Finished use time.Time even though only Finished can be
// zero-valued (Started is always stamped on New). The wire-shape
// formatting -- including omitting an empty Finished -- happens in
// operationToWire over in handler.go, so this struct itself is
// purely the in-memory view and the json tags are minimal.
type Operation struct {
        ID        string         `json:"id"`
        State     OperationState `json:"state"`
        Started   time.Time      `json:"started"`
        Finished  time.Time      `json:"finished"`
        Request   SeedRequest    `json:"request"`
        Result    *SeedResult    `json:"result,omitempty"`
        FatalErr  string         `json:"error,omitempty"`
        Cancelled bool           `json:"cancelled,omitempty"`
}

// Store holds the per-process operation registry. The HTTP handler
// stores newly-minted operations here and reads them back on poll;
// the orchestrator drives state transitions via Mark*. The store is
// in-memory; restarting the gateway forgets every operation, which
// is consistent with the rest of the emulator's lifecycle.
type Store struct {
        mu  sync.Mutex
        ops map[string]*Operation

        // idGen mints opaque operation IDs. Pulled out into a func
        // so tests can pin "operationN" instead of random hex.
        idGen func() string
}

// NewStore constructs an empty operation registry. Each gateway
// process owns one; the seed handler closes over it.
func NewStore() *Store {
        return &Store{
                ops:   make(map[string]*Operation),
                idGen: newRandomID,
        }
}

// New registers a fresh operation in PENDING state and returns it.
// Callers immediately transition to RUNNING via MarkRunning when the
// orchestrator picks it up.
func (s *Store) New(req SeedRequest) *Operation {
        s.mu.Lock()
        defer s.mu.Unlock()
        op := &Operation{
                ID:      s.idGen(),
                State:   OperationPending,
                Started: time.Now().UTC(),
                Request: req,
        }
        s.ops[op.ID] = op
        return cloneOperation(op)
}

// Get returns a snapshot of the operation with the given ID.
// Returns nil when no such operation exists.
func (s *Store) Get(id string) *Operation {
        s.mu.Lock()
        defer s.mu.Unlock()
        op, ok := s.ops[id]
        if !ok {
                return nil
        }
        return cloneOperation(op)
}

// MarkRunning records that the orchestrator has started processing
// the operation. No-op (returns false) when the operation doesn't
// exist or has already left PENDING.
func (s *Store) MarkRunning(id string) bool {
        s.mu.Lock()
        defer s.mu.Unlock()
        op, ok := s.ops[id]
        if !ok || op.State != OperationPending {
                return false
        }
        op.State = OperationRunning
        return true
}

// MarkResult records a successful (or partially-successful)
// completion. The operation's Finished timestamp is stamped here.
func (s *Store) MarkResult(id string, result *SeedResult) bool {
        s.mu.Lock()
        defer s.mu.Unlock()
        op, ok := s.ops[id]
        if !ok {
                return false
        }
        op.State = OperationDone
        op.Result = result
        op.Finished = time.Now().UTC()
        return true
}

// MarkFailed records a catastrophic failure (unreadable production,
// missing creds, ...). Per-resource failures should be folded into
// Result.ResourceErrors and reported via MarkResult instead.
func (s *Store) MarkFailed(id, errMsg string) bool {
        s.mu.Lock()
        defer s.mu.Unlock()
        op, ok := s.ops[id]
        if !ok {
                return false
        }
        op.State = OperationFailed
        op.FatalErr = errMsg
        op.Finished = time.Now().UTC()
        return true
}

// cloneOperation returns a deep-enough copy of op so the caller can
// mutate it without racing other goroutines reading from the store.
// The struct is small and Result is repointed (we never mutate a
// Result after passing it through MarkResult), so a shallow copy is
// sufficient.
func cloneOperation(op *Operation) *Operation {
        cp := *op
        return &cp
}

// newRandomID mints a 16-character hex ID. 64 bits of entropy is
// plenty -- the seed store is in-memory and never federated, so
// collision risk is bounded by the lifetime of one gateway process.
func newRandomID() string {
        var b [8]byte
        if _, err := rand.Read(b[:]); err != nil {
                // crypto/rand is documented to never fail in practice
                // on modern OSes; if it does, fall back to a clearly
                // debug-able id rather than panic.
                return "op-rand-error"
        }
        return "op-" + hex.EncodeToString(b[:])
}

package seed

import (
        "encoding/json"
        "errors"
        "fmt"
        "os"
        "strings"
        "time"
)

// SeedRequest is the JSON body the seed API accepts on
// `POST /api/emulator/seed`. The contract is documented in
// docs/SEEDING.md so operators with existing seed tooling can
// reuse request bodies without changes.
type SeedRequest struct {
        // Source is the production-side resource the seeder reads
        // from. Required.
        Source SeedEndpointRef `json:"source"`

        // Destination is the emulator-side resource the seeder writes
        // into. When omitted, the seeder mirrors Source 1:1 (same
        // project/dataset/table ids on this emulator).
        Destination *SeedDestinationRef `json:"destination,omitempty"`

        // MaxRowsPerTable bounds the number of rows the seeder will
        // copy from any single source table. Zero / negative means
        // "no limit". This is the dominant safety knob -- operators
        // trying to mirror billion-row tables into a local emulator
        // should set this aggressively.
        MaxRowsPerTable int64 `json:"maxRowsPerTable,omitempty"`

        // BillingProject is the GCP project the BigQuery jobs the
        // production read issues are billed against. When omitted
        // the seeder falls back through the documented chain (see
        // ResolveBillingProject).
        BillingProject string `json:"billingProject,omitempty"`
}

// SeedEndpointRef names a production resource. Either Project (full
// project scope), Project+Dataset (dataset scope), or
// Project+Dataset+Table (single-table scope) is supported. Source
// requests with no Project are rejected up front -- BigQuery has no
// well-defined notion of "default project" on the wire.
type SeedEndpointRef struct {
        Project string `json:"project"`
        Dataset string `json:"dataset,omitempty"`
        Table   string `json:"table,omitempty"`
}

// SeedDestinationRef is the same shape as SeedEndpointRef but the
// dataset/table fields are optional remappings. When omitted the
// seeder mirrors the source name verbatim; when set it copies
// `Source.Project.Source.Dataset.Source.Table` into
// `Destination.Project.Destination.Dataset.Destination.Table`.
type SeedDestinationRef struct {
        Project string `json:"project,omitempty"`
        Dataset string `json:"dataset,omitempty"`
        Table   string `json:"table,omitempty"`
}

// SeedResult is what we report back to the caller once an operation
// finishes. Counters follow the stable seed API shape documented in
// docs/SEEDING.md so dashboards and scripts can read responses
// without changes.
type SeedResult struct {
        // Started / Finished are RFC 3339 timestamps.
        Started  string `json:"started"`
        Finished string `json:"finished,omitempty"`

        // DatasetsCreated counts datasets the seeder added to the
        // emulator on this run. Idempotent reruns surface 0 here and
        // a positive DatasetsSkipped.
        DatasetsCreated int `json:"datasetsCreated"`
        DatasetsSkipped int `json:"datasetsSkipped"`

        // TablesCreated / TablesSkipped are the same shape for
        // physical-table resources. Views, materialized views,
        // external tables, and routines all fold into
        // ResourceErrors (one entry per unsupported resource) for
        // the initial integration; see ROADMAP for the support
        // matrix.
        TablesCreated int `json:"tablesCreated"`
        TablesSkipped int `json:"tablesSkipped"`

        // RowsCopied is the wall-total of rows the seeder
        // successfully inserted into the emulator across every
        // destination table this operation touched.
        RowsCopied int64 `json:"rowsCopied"`

        // ResourceErrors holds per-resource failures. The operation
        // itself can still finish "DONE" while individual tables
        // failed -- partial-failure data is returned without forcing
        // the caller to retry the entire scope.
        ResourceErrors []SeedResourceError `json:"resourceErrors,omitempty"`
}

// SeedResourceError captures a per-resource failure (one table, one
// view, one routine, ...). The presence of any non-empty Error in
// ResourceErrors is what the operation polling endpoint surfaces in
// the public Operation.error field; the operation as a whole only
// fails when something catastrophic happens (the production project
// is unreachable, ADC credentials are missing, etc).
type SeedResourceError struct {
        // Resource is a human-readable identifier
        // ("dataset:proj.ds", "table:proj.ds.tbl", "view:proj.ds.v", ...).
        Resource string `json:"resource"`
        // Kind classifies why the failure happened
        // ("unsupported", "rpc", "read", "write", "skipped").
        Kind  string `json:"kind"`
        Error string `json:"error"`
}

// Validate runs cheap input checks the orchestrator depends on
// before it touches the network. Returns ErrInvalidRequest with a
// human-readable message so the HTTP handler can surface a 400 with
// the right reason.
func (r *SeedRequest) Validate() error {
        if r == nil {
                return fmt.Errorf("%w: nil request body", ErrInvalidRequest)
        }
        if strings.TrimSpace(r.Source.Project) == "" {
                return fmt.Errorf("%w: source.project is required", ErrInvalidRequest)
        }
        if r.Source.Table != "" && r.Source.Dataset == "" {
                return fmt.Errorf("%w: source.table requires source.dataset", ErrInvalidRequest)
        }
        if r.Destination != nil {
                if r.Destination.Table != "" && r.Destination.Dataset == "" {
                        return fmt.Errorf("%w: destination.table requires destination.dataset", ErrInvalidRequest)
                }
                if r.Source.Dataset == "" && r.Destination.Dataset != "" {
                        return fmt.Errorf(
                                "%w: destination.dataset requires source.dataset (cannot remap a project-scope seed to a single dataset)",
                                ErrInvalidRequest,
                        )
                }
                if r.Source.Table == "" && r.Destination.Table != "" {
                        return fmt.Errorf("%w: destination.table requires source.table", ErrInvalidRequest)
                }
        }
        if r.MaxRowsPerTable < 0 {
                return fmt.Errorf("%w: maxRowsPerTable must be >= 0", ErrInvalidRequest)
        }
        return nil
}

// Env var names walked by ResolveBillingProject's fallback chain.
// Exported as package-level constants so tests and callers reference
// the same strings the implementation looks up.
const (
        EnvGoogleCloudQuotaProject = "GOOGLE_CLOUD_QUOTA_PROJECT"
        EnvGoogleCloudProject      = "GOOGLE_CLOUD_PROJECT"
        EnvGcloudProject           = "GCLOUD_PROJECT"
)

// billingEnvChain is the documented env-var fallback order
// ResolveBillingProject walks; pulled into a package-level var so
// the order stays self-documenting and tests assert against the
// same source of truth.
var billingEnvChain = []string{
        EnvGoogleCloudQuotaProject,
        EnvGoogleCloudProject,
        EnvGcloudProject,
}

// ResolveBillingProject picks the GCP project the seeder bills its
// production reads against. The fallback chain matches what
// gcloud's tooling follows:
//
//  1. Request body's `billingProject`.
//  2. Gateway default project (--project-id).
//  3. $GOOGLE_CLOUD_QUOTA_PROJECT.
//  4. $GOOGLE_CLOUD_PROJECT.
//  5. $GCLOUD_PROJECT.
//  6. Source project (the read project itself).
//
// `getenv` is injected so tests don't depend on os.Environ; the
// production caller passes os.LookupEnv. A nil getenv is treated
// as "no env vars set" so production code that forgot to pass one
// still gets sane behavior.
func ResolveBillingProject(req SeedRequest, gatewayDefault string, getenv func(string) (string, bool)) string {
        if v := strings.TrimSpace(req.BillingProject); v != "" {
                return v
        }
        if v := strings.TrimSpace(gatewayDefault); v != "" {
                return v
        }
        if getenv == nil {
                return strings.TrimSpace(req.Source.Project)
        }
        for _, key := range billingEnvChain {
                if v, ok := getenv(key); ok && strings.TrimSpace(v) != "" {
                        return strings.TrimSpace(v)
                }
        }
        return strings.TrimSpace(req.Source.Project)
}

// ErrInvalidRequest is the sentinel SeedRequest.Validate wraps so
// callers can detect "this is a 400, not a 500" without string
// matching. Compare via errors.Is.
var ErrInvalidRequest = errors.New("seed: invalid request")

// ErrProductionUnsupported is returned by NewProductionOrchestrator
// when the build does not include a live production client. The
// initial integration intentionally ships without a hard dependency
// on cloud.google.com/go/bigquery so operators who only need the
// YAML seed loader (or the per-PR test runner) don't pay for it; a
// future build tag will swap in the real implementation.
var ErrProductionUnsupported = errors.New("seed: production seeding is not compiled into this build")

// LookupEnvOrEmpty is the production env lookup used by the seed
// handler when it needs to consult the documented env chain (see
// ResolveBillingProject). Pulled out so tests can stub it via the
// type alias rather than the global.
var LookupEnvOrEmpty = os.LookupEnv

// nowRFC3339 returns the current UTC time in the format the
// SeedResult timestamps use. Wrapped in a package-level var so
// tests can pin time.
var nowRFC3339 = func() string {
        return time.Now().UTC().Format(time.RFC3339)
}

// DecodeRequest parses a JSON request body into a SeedRequest. The
// helper exists so the handler doesn't have to know whether to
// configure json.Decoder.DisallowUnknownFields (we do, so a typo
// like "billing_project" surfaces as a 400 rather than silently
// ignoring the operator's intent).
func DecodeRequest(b []byte) (SeedRequest, error) {
        var req SeedRequest
        dec := json.NewDecoder(strings.NewReader(string(b)))
        dec.DisallowUnknownFields()
        if err := dec.Decode(&req); err != nil {
                return SeedRequest{}, fmt.Errorf("%w: %w", ErrInvalidRequest, err)
        }
        return req, nil
}

package gateway

import (
        "context"
        "os"

        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
)

// newSeedRunner constructs the production seed.Runner the
// `POST /api/emulator/seed` handler dispatches to when
// EnableSeedAPI is true.
//
// The default build returns a runner whose underlying
// ProductionReader is the "unsupported" stub
// (seed.NewProductionReader without a build tag), so the seed
// handler still surfaces a clean 501 instead of a panic when the
// operator forgot to build with `-tags=seed_production_live`. The
// CatalogClient half is always real -- it's what the gateway
// already uses for every REST handler.
//
// The returned Runner closes over the engine client; callers must
// keep the engine subprocess alive for as long as they expect to
// service seed requests.
func newSeedRunner(opts Options, eng *engine.Client) seed.Runner {
        defaults := DefaultsFromOptions(opts)
        return &lazyProductionRunner{
                defaults:  defaults,
                applier:   seed.NewCatalogApplier(eng.Catalog),
                envLookup: os.LookupEnv,
        }
}

// DefaultsFromOptions projects the seeding-relevant fields of an
// Options struct into the small seed.Defaults shape the orchestrator
// and YAML loader consume. Lives in the gateway package (rather
// than gateway/seed) so the seed package never imports the gateway
// package, which would create an import cycle through the route
// registration in gateway/server.go.
func DefaultsFromOptions(o Options) seed.Defaults {
        return seed.Defaults{
                ProjectID:       o.DefaultProjectID,
                DatasetLocation: o.DefaultDatasetLocation,
        }
}

// lazyProductionRunner defers ProductionReader construction until
// the first request. That keeps gateway startup time cheap when
// nobody actually invokes the seed API and lets us surface ADC /
// quota / billing errors as a per-operation failure rather than a
// hard fail at gateway boot.
type lazyProductionRunner struct {
        defaults  seed.Defaults
        applier   seed.Applier
        envLookup func(string) (string, bool)
}

// Run satisfies seed.Runner. The first invocation constructs the
// production reader (which fails with ErrProductionUnsupported in
// the default build); subsequent invocations get their own reader
// so the cloud client's connection lifecycle stays scoped to one
// request (one reader per seed invocation).
func (l *lazyProductionRunner) Run(ctx context.Context, req seed.SeedRequest) (*seed.SeedResult, error) {
        billing := seed.ResolveBillingProject(req, l.defaults.ProjectID, l.envLookup)
        reader, err := seed.NewProductionReader(ctx, billing, l.envLookup)
        if err != nil {
                return nil, err
        }
        orch := seed.NewOrchestrator(reader, l.applier, l.defaults)
        orch.EnvLookup = l.envLookup
        return orch.Run(ctx, req)
}

package seedfile

import (
        "context"
        "fmt"
        "strings"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
)

// applyTimeout bounds the total wall time the loader spends talking
// to the engine when applying one file. Large seeds (thousands of
// rows) easily fit inside this budget on a local engine; a value
// much smaller risks flaky startups on CI when the engine is still
// warming caches.
const applyTimeout = 2 * time.Minute

// ApplyFiles loads and applies each YAML seed file in order. The
// loader fails fast on the first file that does not parse or
// validate so operators see the actual schema error rather than a
// stream of confusing follow-on failures.
//
// `defaults` carries the gateway-level fallbacks (project id, dataset
// location) used when a file omits them. `applier` is the engine-
// facing surface; in production this is seed.NewCatalogApplier over
// the live CatalogClient.
func ApplyFiles(paths []string, applier seed.Applier, defaults seed.Defaults) error {
        ctx, cancel := context.WithTimeout(context.Background(), applyTimeout)
        defer cancel()
        return ApplyFilesContext(ctx, paths, applier, defaults)
}

// ApplyFilesContext is the context-aware twin of ApplyFiles. Use
// this from tests so they can pass a short-deadline context to
// exercise cancellation behavior.
func ApplyFilesContext(ctx context.Context, paths []string, applier seed.Applier, defaults seed.Defaults) error {
        for _, p := range paths {
                f, err := Load(p)
                if err != nil {
                        return err
                }
                if err := Apply(ctx, f, applier, defaults); err != nil {
                        return fmt.Errorf("seedfile %s: %w", p, err)
                }
        }
        return nil
}

// Apply materializes one decoded File against the engine via the
// supplied applier. The order is deterministic: datasets in the
// order they appear in the file, tables within a dataset in
// declaration order, rows in declaration order. Operators rely on
// this for reproducible seeds (e.g. autoincrement-style ids).
//
// The function is forgiving on "already exists" errors at the
// dataset and table level: the applier returns created=false in
// that case. Rows are inserted only when the table was newly
// created so gateway restarts against a persistent data_dir do
// not duplicate seed data.
func Apply(ctx context.Context, f *File, applier seed.Applier, defaults seed.Defaults) error {
        if f == nil {
                return nil
        }
        for i, ds := range f.Datasets {
                project := firstNonEmpty(ds.ProjectID, f.DefaultProjectID, defaults.ProjectID)
                if project == "" {
                        return fmt.Errorf(
                                "datasets[%d] (id=%q): no project_id set (file default, dataset entry, and --project-id all empty)",
                                i,
                                ds.ID,
                        )
                }
                location := firstNonEmpty(ds.Location, f.DefaultLocation, defaults.DatasetLocation)
                if _, err := applier.EnsureDataset(ctx, project, ds.ID, location); err != nil {
                        return fmt.Errorf("ensure dataset %s.%s: %w", project, ds.ID, err)
                }
                for j, tbl := range ds.Tables {
                        schema := fieldsToProto(tbl.Schema)
                        ref := seed.TableRef{
                                ProjectID: project,
                                DatasetID: ds.ID,
                                TableID:   tbl.ID,
                        }
                        created, err := applier.EnsureTable(ctx, ref, schema)
                        if err != nil {
                                return fmt.Errorf("ensure table %s.%s.%s: %w",
                                        project, ds.ID, tbl.ID, err)
                        }
                        if len(tbl.Rows) == 0 || !created {
                                continue
                        }
                        if _, err := applier.InsertRows(ctx, ref, schema, tbl.Rows); err != nil {
                                return fmt.Errorf("insert rows for %s.%s.%s (file datasets[%d].tables[%d]): %w",
                                        project, ds.ID, tbl.ID, i, j, err)
                        }
                }
        }
        return nil
}

// firstNonEmpty returns the first trim-non-empty string from the
// supplied values. Used to walk the (entry > file-default >
// gateway-default) precedence chain for project id and location.
func firstNonEmpty(vs ...string) string {
        for _, v := range vs {
                if t := strings.TrimSpace(v); t != "" {
                        return t
                }
        }
        return ""
}

// fieldsToProto recursively converts the YAML FieldSchema slice
// into the engine's proto TableSchema. Nested STRUCT/RECORD fields
// are walked verbatim.
func fieldsToProto(fields []FieldSchema) *enginepb.TableSchema {
        out := &enginepb.TableSchema{Fields: make([]*enginepb.FieldSchema, 0, len(fields))}
        for _, f := range fields {
                out.Fields = append(out.Fields, fieldToProto(f))
        }
        return out
}

func fieldToProto(f FieldSchema) *enginepb.FieldSchema {
        fieldType := f.Type
        if strings.EqualFold(fieldType, "RECORD") {
                fieldType = "STRUCT"
        }
        pf := &enginepb.FieldSchema{
                Name:        f.Name,
                Type:        fieldType,
                Mode:        f.Mode,
                Description: f.Description,
        }
        for _, sub := range f.Fields {
                pf.Fields = append(pf.Fields, fieldToProto(sub))
        }
        return pf
}

package seedfile

import (
        "path/filepath"
        "regexp"
        "slices"
        "strings"
)

// PublicDataProject is the BigQuery project id thirdparty samples use for
// public dataset queries.
const PublicDataProject = "bigquery-public-data"

// PublicDataSeedRelPath is the repo-relative path to the bundled YAML
// fixture. Docker copies it under /opt/bigquery-emulator/.
const PublicDataSeedRelPath = "testdata/public-data/bigquery-public-data.yaml"

// PublicDataSeedContainerPath is where the runtime image installs the
// fixture so gateway_main can pass --seed-data-file without host mounts.
const PublicDataSeedContainerPath = "/opt/bigquery-emulator/testdata/public-data/bigquery-public-data.yaml"

// SeededPublicTables lists project.dataset.table resources the bundled
// fixture materializes. Skip matrices (python emulator_pytest_skip,
// third_party/README.md) treat only these refs as emulator-backed.
var SeededPublicTables = []string{
        PublicDataProject + ".usa_names.usa_1910_2013",
        PublicDataProject + ".usa_names.usa_1910_current",
        PublicDataProject + ".samples.shakespeare",
        PublicDataProject + ".github_repos.commits",
        PublicDataProject + ".stackoverflow.posts_questions",
        PublicDataProject + ".ml_datasets.penguins",
        PublicDataProject + ".utility_us.country_code_iso",
}

var publicTableRefRE = regexp.MustCompile(
        `bigquery-public-data[.:]([a-zA-Z0-9_]+)[.:]([a-zA-Z0-9_]+)`,
)

// PublicDataSeedPathFromRoot returns the absolute path to the bundled
// fixture given a repository root directory.
func PublicDataSeedPathFromRoot(repoRoot string) string {
        return filepath.Join(repoRoot, PublicDataSeedRelPath)
}

// PublicDataRefsInText returns normalized project.dataset.table refs
// found in SQL or sample source text.
func PublicDataRefsInText(text string) map[string]struct{} {
        out := make(map[string]struct{})
        for _, m := range publicTableRefRE.FindAllStringSubmatch(text, -1) {
                if len(m) < 3 {
                        continue
                }
                ref := PublicDataProject + "." + m[1] + "." + m[2]
                out[ref] = struct{}{}
        }
        return out
}

// PublicDataRefsFullySeeded reports whether every bigquery-public-data
// table reference in text is covered by SeededPublicTables.
func PublicDataRefsFullySeeded(text string) bool {
        refs := PublicDataRefsInText(text)
        if len(refs) == 0 {
                return false
        }
        seeded := make(map[string]struct{}, len(SeededPublicTables))
        for _, t := range SeededPublicTables {
                seeded[t] = struct{}{}
        }
        for ref := range refs {
                if _, ok := seeded[ref]; !ok {
                        return false
                }
        }
        return true
}

// IsSeededPublicTable returns true when ref is one of the bundled tables.
// ref may be project.dataset.table or dataset.table (project assumed).
func IsSeededPublicTable(ref string) bool {
        ref = strings.TrimSpace(ref)
        if !strings.HasPrefix(ref, PublicDataProject+".") {
                ref = PublicDataProject + "." + ref
        }
        return slices.Contains(SeededPublicTables, ref)
}

// Package seedfile loads a declarative YAML file at gateway startup
// and applies its datasets / tables / rows to the engine via the
// shared seed.Applier surface (gateway/seed).
//
// The YAML schema is intentionally close to the BigQuery REST API's
// dataset / table / row shape so operators who know one can read the
// other:
//
//        project_id: dev            # default project (optional; can also
//                                   # set per-dataset)
//        location: US               # default location (optional)
//        datasets:
//          - id: ds
//            project_id: dev        # optional override
//            location: US           # optional override
//            tables:
//              - id: people
//                schema:
//                  - {name: id, type: INT64, mode: REQUIRED}
//                  - {name: name, type: STRING}
//                rows:
//                  - {id: 1, name: ada}
//                  - {id: 2, name: bob}
//
// The schema is the runtime seed schema; it is deliberately
// independent from the conformance/runner.Fixture format (which
// carries test-only expectations) so production seeding doesn't pick
// up assertions that have no meaning at runtime.
package seedfile

import (
        "errors"
        "fmt"
        "os"
        "strings"

        "gopkg.in/yaml.v3"
)

// File is the top-level YAML schema. Defaults at this level apply
// when a per-dataset field is empty.
type File struct {
        // DefaultProjectID is the project a dataset belongs to when
        // the dataset itself omits project_id. When both are empty
        // the loader falls back to seed.Defaults.ProjectID (the
        // gateway-level --project-id), and finally errors if even
        // that is missing.
        DefaultProjectID string `yaml:"project_id"`

        // DefaultLocation is the BigQuery location stamped on a
        // dataset when neither the dataset entry nor the gateway
        // supply one. Empty stays empty -- the engine will accept
        // the dataset without a location and clients can read it
        // back as such.
        DefaultLocation string `yaml:"location"`

        // Datasets enumerates the resources the loader will materialize.
        Datasets []Dataset `yaml:"datasets"`
}

// Dataset describes one logical BigQuery dataset and the tables
// inside it.
type Dataset struct {
        // ID is the dataset's BigQuery id. Required.
        ID string `yaml:"id"`

        // ProjectID overrides the file-level default for this
        // dataset. Optional.
        ProjectID string `yaml:"project_id"`

        // Location overrides the file-level default. Optional.
        Location string `yaml:"location"`

        // Tables is the per-dataset table list. May be empty so
        // operators can pre-create empty datasets (matches BigQuery's
        // "dataset without any tables" state).
        Tables []Table `yaml:"tables"`
}

// Table mirrors the REST API's Table resource. Schemas are
// positional (the column order defines the row layout) and rows
// are key/value maps keyed by column name.
type Table struct {
        // ID is the table's BigQuery id. Required.
        ID string `yaml:"id"`

        // Schema enumerates the table's columns. Required for tables
        // that include rows so the loader can lay cells out
        // positionally. Empty schema is allowed for "register the
        // table, no rows" workflows.
        Schema []FieldSchema `yaml:"schema"`

        // Rows is the per-table row list. Each row is a map keyed by
        // column name; missing columns become NULL cells. Extra keys
        // not in the schema are silently dropped, matching the
        // `tabledata.insertAll` handler's behavior.
        Rows []map[string]any `yaml:"rows"`
}

// FieldSchema mirrors enginepb.FieldSchema. We don't reuse the
// proto struct directly because the YAML decoder is happier with
// plain Go tags than with the generated protobuf struct.
type FieldSchema struct {
        // Name is the column name. Required.
        Name string `yaml:"name"`

        // Type is the BigQuery type name (STRING, INT64, BOOL, ...).
        // Required.
        Type string `yaml:"type"`

        // Mode is one of NULLABLE | REQUIRED | REPEATED. Empty
        // defaults to NULLABLE on the engine side.
        Mode string `yaml:"mode"`

        // Description is a free-form column description. Optional.
        Description string `yaml:"description"`

        // Fields holds nested STRUCT/RECORD fields. Walked
        // recursively when present.
        Fields []FieldSchema `yaml:"fields"`
}

// Load reads a YAML file from disk and decodes it into File. We
// reject unknown top-level keys so a typo (e.g. `projects:` instead
// of `datasets:`) surfaces as an error rather than silently
// producing an empty seed.
//
// `path` is operator-supplied via --seed-data-file; the gosec G304
// warning is expected (the whole point of the helper is to read
// from a caller-named path) and suppressed inline.
func Load(path string) (*File, error) {
        b, err := os.ReadFile(path) //nolint:gosec // path is the operator-supplied --seed-data-file
        if err != nil {
                return nil, fmt.Errorf("seedfile: read %s: %w", path, err)
        }
        return Decode(b, path)
}

// Decode parses YAML bytes into a File. The `source` argument is
// only used in error messages; pass the originating path when
// available, "" for in-memory inputs.
func Decode(data []byte, source string) (*File, error) {
        var f File
        dec := yaml.NewDecoder(strings.NewReader(string(data)))
        dec.KnownFields(true)
        if err := dec.Decode(&f); err != nil && !errors.Is(err, ErrEmptyFile) {
                // io.EOF from gopkg.in/yaml.v3 means the file is
                // effectively empty -- treat that as a valid no-op
                // rather than a parse error.
                if err.Error() == "EOF" {
                        return &File{}, nil
                }
                return nil, fmt.Errorf("seedfile: parse %s: %w", labelSource(source), err)
        }
        if err := f.Validate(); err != nil {
                return nil, fmt.Errorf("seedfile: validate %s: %w", labelSource(source), err)
        }
        return &f, nil
}

// ErrEmptyFile is returned by Decode when the input is empty.
// Wrapped so callers can detect it with errors.Is.
var ErrEmptyFile = errors.New("seedfile: empty input")

// labelSource returns a non-empty descriptor for error messages.
func labelSource(s string) string {
        if s == "" {
                return "<input>"
        }
        return s
}

// Validate runs cheap structural checks before the loader starts
// talking to the engine. The error wording aims to point at the
// exact field so operator fixes are quick.
func (f *File) Validate() error {
        for i, ds := range f.Datasets {
                if strings.TrimSpace(ds.ID) == "" {
                        return fmt.Errorf("datasets[%d].id is required", i)
                }
                for j, tbl := range ds.Tables {
                        if strings.TrimSpace(tbl.ID) == "" {
                                return fmt.Errorf("datasets[%d].tables[%d].id is required", i, j)
                        }
                        if len(tbl.Rows) > 0 && len(tbl.Schema) == 0 {
                                return fmt.Errorf("datasets[%d].tables[%d].schema is required when rows are present",
                                        i, j)
                        }
                        for k, field := range tbl.Schema {
                                if strings.TrimSpace(field.Name) == "" {
                                        return fmt.Errorf("datasets[%d].tables[%d].schema[%d].name is required",
                                                i, j, k)
                                }
                                if strings.TrimSpace(field.Type) == "" {
                                        return fmt.Errorf("datasets[%d].tables[%d].schema[%d].type is required",
                                                i, j, k)
                                }
                        }
                }
        }
        return nil
}

package gateway

import (
        "log/slog"
        "net/http"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers"
        "github.com/vantaboard/bigquery-emulator/gateway/handlers/datatransfer"
        "github.com/vantaboard/bigquery-emulator/gateway/middleware"
        "github.com/vantaboard/bigquery-emulator/gateway/seed"
        "github.com/vantaboard/bigquery-emulator/gateway/sqltools"
)

// NewServer returns the HTTP handler tree implementing the BigQuery REST
// surface. Routes use Go 1.22+ method-aware patterns.
//
// Routes here mirror the public BigQuery v2 REST API. The canonical
// emulator-side mapping (with handler pointers and status) lives in
// docs/REST_API.md; the upstream documentation we cross-check against
// lives under docs/bigquery/docs/reference/rest/v2/.
//
// Every endpoint listed in docs/REST_API.md is registered here, even if
// the handler currently returns http.StatusNotImplemented. That gives
// client libraries a stable surface to probe and lets us flip handlers
// from stub to real one resource at a time, exactly the way the
// gateway-HTTP-surface section of ROADMAP.md prescribes.
//
// Custom-method endpoints (the AIP-136 "{resource}:operation" shape used
// by datasets.undelete and the three tables IAM endpoints) cannot be
// expressed directly in net/http's mux pattern syntax, which requires
// every wildcard segment to end with `}`. For those, we register the
// parent path and dispatch on the trailing `:op` inside the handler.
func NewServer(opts Options, deps handlers.Dependencies, eng *engine.Client) http.Handler {
        mux := http.NewServeMux()

        mux.HandleFunc("GET /{$}", handlers.Health)
        mux.HandleFunc("GET /healthz", handlers.Health)
        mux.HandleFunc("/", handlers.NotFound)
        mux.HandleFunc("GET /discovery/v1/apis/bigquery/v2/rest", handlers.Discovery(deps))

        mountBigQueryV2(mux, deps)
        mountMigration(mux, deps)
        mountDataTransfer(mux)
        mountSeedAPI(mux, opts, eng)
        mountSQLToolsAPI(mux, opts, eng)

        return wrapMiddleware(opts, mux)
}

// mountBigQueryV2 registers every BigQuery v2 endpoint under both the
// `/bigquery/v2/...` prefix (what gcloud, bq, and clients pointed at
// real `*.googleapis.com` use) AND the bare `/...` form. The bare
// form is required because the official client libraries treat
// BIGQUERY_EMULATOR_HOST as the verbatim baseUrl with no version
// segment — for example @google-cloud/bigquery v8's bigquery.js
// sets `baseUrl = EMULATOR_HOST || ${apiEndpoint}/bigquery/v2`,
// which means a client configured via BIGQUERY_EMULATOR_HOST issues
// `POST /projects/{p}/queries` (no `/bigquery/v2`). Mirroring both
// forms keeps the public REST surface working for both invocation
// styles without a StripPrefix middleware that would have to fork
// on the other top-level prefixes (`/discovery/...`, `/upload/...`,
// `/v2alpha/...`, `/v2/...`, `/v1/...`, `/healthz`).
func mountBigQueryV2(mux *http.ServeMux, deps handlers.Dependencies) {
        mountBQv2 := func(method, path string, h http.HandlerFunc) {
                mux.HandleFunc(method+" /bigquery/v2"+path, h)
                mux.HandleFunc(method+" "+path, h)
        }
        mountProjectsAndDatasets(mountBQv2, deps)
        mountTables(mountBQv2, deps)
        mountModelsAndRoutines(mountBQv2, deps)
        mountJobsAndQueries(mux, mountBQv2, deps)
}

// mountFunc is the per-method mounting helper used by the BigQuery
// v2 sub-mounters. It registers a handler under both `/bigquery/v2`
// and bare-prefix mux patterns (see mountBigQueryV2 doc-comment for
// why the bare form is required).
type mountFunc = func(method, path string, h http.HandlerFunc)

// mountProjectsAndDatasets registers projects.* and datasets.*
// (including datasets.undelete on the trailing `:undelete` segment).
func mountProjectsAndDatasets(mount mountFunc, deps handlers.Dependencies) {
        mount("GET", "/projects", handlers.ProjectList(deps))
        mount("GET", "/projects/{projectId}/serviceAccount", handlers.ProjectGetServiceAccount(deps))

        mount("GET", "/projects/{projectId}/datasets", handlers.DatasetList(deps))
        mount("POST", "/projects/{projectId}/datasets", handlers.DatasetInsert(deps))
        mount("GET", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetGet(deps))
        mount("PUT", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetUpdate(deps))
        mount("PATCH", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetPatch(deps))
        mount("DELETE", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetDelete(deps))
        // datasets.undelete: POST /datasets/{datasetId}:undelete, dispatched
        // on the trailing :undelete in the wildcard.
        mount("POST", "/projects/{projectId}/datasets/{datasetId}", handlers.DatasetCustomMethodPOST(deps))
}

// mountTables registers tables.*, tabledata.*, and the table-scoped
// rowAccessPolicies surface. The trailing `:getIamPolicy` /
// `:setIamPolicy` / `:testIamPermissions` custom methods are
// dispatched in-handler because Go's mux can't match them directly.
func mountTables(mount mountFunc, deps handlers.Dependencies) {
        mount("GET", "/projects/{projectId}/datasets/{datasetId}/tables", handlers.TableList(deps))
        mount("POST", "/projects/{projectId}/datasets/{datasetId}/tables", handlers.TableInsert(deps))
        mount("GET", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TableGet(deps))
        mount("PUT", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TableUpdate(deps))
        mount("PATCH", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TablePatch(deps))
        mount("DELETE", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}", handlers.TableDelete(deps))
        mount(
                "POST",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}",
                handlers.TableCustomMethodPOST(deps),
        )

        mount("GET", "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/data", handlers.TableDataList(deps))
        mount(
                "POST",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/insertAll",
                handlers.TableDataInsertAll(deps),
        )

        // Row-access policies (table-scoped row-level security).
        mount(
                "GET",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies",
                handlers.RowAccessPolicyDispatch(deps),
        )
        mount(
                "POST",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies",
                handlers.RowAccessPolicyDispatch(deps),
        )
        mount(
                "GET",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
                handlers.RowAccessPolicyDispatch(deps),
        )
        mount(
                "PUT",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
                handlers.RowAccessPolicyDispatch(deps),
        )
        mount(
                "DELETE",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
                handlers.RowAccessPolicyDispatch(deps),
        )
        mount(
                "POST",
                "/projects/{projectId}/datasets/{datasetId}/tables/{tableId}/rowAccessPolicies/{policyId}",
                handlers.RowAccessPolicyDispatch(deps),
        )
}

// mountModelsAndRoutines registers the BQML and routines (UDF / TVF
// / stored procedure) endpoints. BQML has no engine backing; routines
// delegate to the engine catalog when wired to emulator_main and mirror
// metadata in the gateway routines store for timestamps and DDL paths.
func mountModelsAndRoutines(mount mountFunc, deps handlers.Dependencies) {
        mount("GET", "/projects/{projectId}/datasets/{datasetId}/models", handlers.ModelList(deps))
        mount("GET", "/projects/{projectId}/datasets/{datasetId}/models/{modelId}", handlers.ModelGet(deps))
        mount("PATCH", "/projects/{projectId}/datasets/{datasetId}/models/{modelId}", handlers.ModelPatch(deps))
        mount("DELETE", "/projects/{projectId}/datasets/{datasetId}/models/{modelId}", handlers.ModelDelete(deps))

        mount("GET", "/projects/{projectId}/datasets/{datasetId}/routines", handlers.RoutineList(deps))
        mount("POST", "/projects/{projectId}/datasets/{datasetId}/routines", handlers.RoutineInsert(deps))
        mount("GET", "/projects/{projectId}/datasets/{datasetId}/routines/{routineId}", handlers.RoutineGet(deps))
        mount("PUT", "/projects/{projectId}/datasets/{datasetId}/routines/{routineId}", handlers.RoutineUpdate(deps))
        mount("DELETE", "/projects/{projectId}/datasets/{datasetId}/routines/{routineId}", handlers.RoutineDelete(deps))
}

// mountJobsAndQueries registers jobs.* (including the upload variant
// of jobs.insert) and the synchronous queries.* endpoints. The
// trailing `/delete` on jobs.delete is not a typo; see
// docs/bigquery/docs/reference/rest/v2/jobs/delete.md.
func mountJobsAndQueries(mux *http.ServeMux, mount mountFunc, deps handlers.Dependencies) {
        mount("GET", "/projects/{projectId}/jobs", handlers.JobList(deps))
        mount("POST", "/projects/{projectId}/jobs", handlers.JobInsert(deps))
        // jobs.insert media-upload variant. The upload prefix is fixed by
        // the public BigQuery API and the client libraries hardcode it, so
        // only the `/upload/bigquery/v2/...` form is registered here.
        mux.HandleFunc("POST /upload/bigquery/v2/projects/{projectId}/jobs", handlers.JobInsertUpload(deps))
        mux.HandleFunc("PUT /upload/bigquery/v2/projects/{projectId}/jobs", handlers.JobInsertUpload(deps))
        mount("GET", "/projects/{projectId}/jobs/{jobId}", handlers.JobGet(deps))
        mount("POST", "/projects/{projectId}/jobs/{jobId}/cancel", handlers.JobCancel(deps))
        mount("DELETE", "/projects/{projectId}/jobs/{jobId}/delete", handlers.JobDelete(deps))

        mount("POST", "/projects/{projectId}/queries", handlers.QueryRun(deps))
        mount("GET", "/projects/{projectId}/queries/{jobId}", handlers.QueryGetResults(deps))
}

// mountMigration registers the BigQuery Migration v2alpha surface
// (alias-served at v2 too). The official client libraries read
// BIGQUERY_MIGRATION_EMULATOR_HOST and fall back to
// BIGQUERY_EMULATOR_HOST, so this gateway covers both surfaces from
// the same listener. List returns the empty page so startup probes
// succeed; create/start/get/delete return the documented 404/501.
// See gateway/handlers/migration.go.
func mountMigration(mux *http.ServeMux, deps handlers.Dependencies) {
        for _, ver := range []string{"v2alpha", "v2"} {
                base := "/" + ver + "/projects/{projectId}/locations/{location}/workflows"
                mux.HandleFunc("GET "+base, handlers.MigrationWorkflowList(deps))
                mux.HandleFunc("POST "+base, handlers.MigrationWorkflowCreate(deps))
                mux.HandleFunc("GET "+base+"/{workflowId}", handlers.MigrationWorkflowGet(deps))
                mux.HandleFunc("DELETE "+base+"/{workflowId}", handlers.MigrationWorkflowDelete(deps))
                // AIP-136 custom methods (only :start today) — Go's mux can't
                // match `{workflowId}:start` directly, so dispatch in-handler.
                mux.HandleFunc("POST "+base+"/{workflowId}", handlers.MigrationWorkflowCustomMethodPOST(deps))
        }
}

// mountDataTransfer registers the BigQuery Data Transfer Service v1
// surface. The shallow-emulator port per docs/ENGINE_POLICY.md
// replaces the empty shell that lived in
// gateway/handlers/data_transfer.go: dataSources catalog
// (`scheduled_query`, `amazon_s3`), in-memory CRUD for
// transferConfigs + transferRuns, and the AIP-136 custom methods
// (`scheduleRuns`, `checkValidCreds`, `startManualRuns`). See
// `docs/ENGINE_POLICY.md`.
func mountDataTransfer(mux *http.ServeMux) {
        dts := datatransfer.NewHandler(nil)
        dts.Register(mux)
}

// mountSeedAPI registers the seed API surface only when explicitly
// enabled via --enable-seed-api. The routes refuse non-loopback
// callers by default; an operator who needs CI/CD reach must combine
// `--seed-api-allow-remote` with `--seed-api-seed-token` for the
// documented defense-in-depth posture. The Runner is left nil
// when eng is nil because the default build does not link
// cloud.google.com/go/bigquery; building with
// `-tags=seed_production_live` adds the production runner. In
// Runner=nil mode the POST handler returns 501 with the documented
// "use --seed-data-file" message so operators see a meaningful error
// instead of a hung op.
func mountSeedAPI(mux *http.ServeMux, opts Options, eng *engine.Client) {
        if !opts.EnableSeedAPI {
                return
        }
        var runner seed.Runner
        if eng != nil {
                runner = newSeedRunner(opts, eng)
        }
        seed.RegisterRoutes(mux, seed.HandlerDeps{
                Access: seed.AccessConfig{
                        AllowRemote: opts.SeedAPIAllowRemote,
                        Token:       opts.SeedAPISeedToken,
                },
                Store:  seed.NewStore(),
                Runner: runner,
        })
}

// mountSQLToolsAPI registers POST /api/emulator/sql/* when enabled.
func mountSQLToolsAPI(mux *http.ServeMux, opts Options, eng *engine.Client) {
        if !opts.EnableSQLToolsAPI {
                return
        }
        sqltools.RegisterRoutes(mux, sqltools.HandlerDeps{
                Access: sqltools.AccessConfig{
                        AllowRemote: opts.SQLToolsAPIAllowRemote,
                        Token:       opts.SQLToolsAPISeedToken,
                },
                Client: eng,
        })
}

// wrapMiddleware applies the gateway's standing middleware stack
// (gunzip, auth, optional structured request log) on top of the
// raw mux. Returned handler is what the gateway listens on.
func wrapMiddleware(opts Options, mux http.Handler) http.Handler {
        // Gunzip middleware runs FIRST so handlers see the inflated JSON
        // body. The Java BigQuery client sets `Content-Encoding: gzip` on
        // every POST/PUT/PATCH by default; without this the gateway's
        // JSON decoders trip on the gzip framing magic byte (`\x1f`) and
        // emit `invalid character '\x1f' looking for beginning of value`.
        // See gateway/middleware/gunzip.go for the contract.
        handler := middleware.WithGunzipRequestBody(mux)
        // Auth middleware always runs: it parses (but never validates) the
        // Authorization header and attaches a synthetic principal to the
        // request context. Per docs/REST_API.md and the
        // gateway-HTTP-surface section of ROADMAP.md, the emulator must
        // never 401, so this is permissive by design.
        handler = middleware.WithAuth(handler)
        // Loopback tag middleware always runs: it records whether the
        // request arrived from a loopback caller so handlers can gate
        // emulator-internal debug fields on it. The single user today is
        // the synchronous query handler, which surfaces
        // `Job.statistics.query.emulatorRoute` (the C++ coordinator's
        // canonical route disposition string) only to loopback callers
        // per `docs/ENGINE_POLICY.md`.
        handler = middleware.WithLoopbackTag(handler)
        // X-HTTP-Method-Override translation runs OUTSIDE every other
        // middleware that inspects `r.Method` (auth/loopback/gunzip all
        // ignore the verb, the mux dispatches on it) but INSIDE the
        // access-log layer below so the log line reflects the original
        // `POST` the client put on the wire. The Java google-http-client
        // `MethodOverride` interceptor that ships enabled in every Google
        // Cloud Java SDK uses this header to tunnel PATCH/PUT/DELETE
        // through POST (Java `URLConnection` historically rejected
        // `setRequestMethod("PATCH")`), which is why the BigQuery Java
        // sample `AuthorizeDatasetIT` was 405-ing on
        // `POST /projects/{p}/datasets/{d}` until tp05. See
        // `gateway/middleware/method_override.go` for the contract.
        handler = middleware.WithMethodOverride(handler)
        if opts.LogRequests {
                logger := opts.Logger
                if logger == nil {
                        logger = slog.New(slog.DiscardHandler)
                }
                handler = loggingMiddleware(logger, handler)
        }
        return handler
}

// loggingMiddleware logs each completed HTTP request as a structured
// slog event. Routing the request line through key/value pairs (instead
// of `log.Printf("%s %s ...", ...)`) keeps the logger's typed-value
// path between gateway and handler, defangs gosec G706's
// log-injection finding (the attacker-controlled URI never lands in a
// format-string position), and lets operators ship the JSON output to
// structured backends.
func loggingMiddleware(logger *slog.Logger, next http.Handler) http.Handler {
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                start := time.Now()
                // Snapshot the method as it arrived on the wire BEFORE any
                // downstream middleware rewrites it. WithMethodOverride (see
                // wrapMiddleware) replaces `r.Method` in place when a POST
                // tunnels a PATCH/PUT/DELETE via X-HTTP-Method-Override, so
                // reading `r.Method` after next.ServeHTTP would log the
                // rewritten verb and lose the literal POST the operator
                // actually needs to see in the access log when debugging a
                // 405 / route-mismatch.
                method := r.Method
                rw := &statusRecorder{ResponseWriter: w, status: http.StatusOK}
                next.ServeHTTP(rw, r)
                logger.InfoContext(r.Context(), "request",
                        slog.String("method", method),
                        slog.String("uri", r.URL.RequestURI()),
                        slog.Int("status", rw.status),
                        slog.Duration("dur", time.Since(start)),
                )
        })
}

type statusRecorder struct {
        http.ResponseWriter
        status int
}

func (s *statusRecorder) WriteHeader(code int) {
        s.status = code
        s.ResponseWriter.WriteHeader(code)
}

// Package session is the gateway-side, in-memory BigQuery session registry.
// Sessions are minted when a query job requests createSession=true and are
// reattached on follow-up queries that pass connectionProperties session_id.
package session

import (
        "crypto/rand"
        "encoding/hex"
        "sync"

        "github.com/vantaboard/bigquery-emulator/gateway/bqtypes"
)

// Store tracks server-generated session ids for the lifetime of the gateway
// process. State is volatile; restarts wipe the table.
type Store struct {
        mu   sync.RWMutex
        byID map[string]record
}

type record struct {
        projectID string
        location  string
}

// NewStore returns an empty session registry.
func NewStore() *Store {
        return &Store{byID: map[string]record{}}
}

// Resolve returns sessionInfo for a query/job when createSession is set or when
// connectionProperties carries session_id. Returns nil for non-session queries.
func (s *Store) Resolve(
        projectID, location string,
        createSession bool,
        connProps []bqtypes.ConnectionProperty,
) *bqtypes.SessionInfo {
        if s == nil {
                return nil
        }
        if createSession {
                return s.mint(projectID, location)
        }
        if sid := connectionSessionID(connProps); sid != "" {
                s.register(sid, projectID, location)
                return &bqtypes.SessionInfo{SessionID: sid}
        }
        return nil
}

func (s *Store) mint(projectID, location string) *bqtypes.SessionInfo {
        id := newSessionID()
        s.register(id, projectID, location)
        return &bqtypes.SessionInfo{SessionID: id}
}

func (s *Store) register(id, projectID, location string) {
        s.mu.Lock()
        defer s.mu.Unlock()
        if _, ok := s.byID[id]; !ok {
                s.byID[id] = record{projectID: projectID, location: location}
        }
}

func connectionSessionID(props []bqtypes.ConnectionProperty) string {
        for _, p := range props {
                if p.Key == "session_id" && p.Value != "" {
                        return p.Value
                }
        }
        return ""
}

func newSessionID() string {
        b := make([]byte, 16)
        _, _ = rand.Read(b)
        return hex.EncodeToString(b)
}

// Package snapshots retains soft-deleted table data so COPY jobs can
// read snapshot decorators (table@epoch) for undelete samples.
package snapshots

import (
        "context"
        "fmt"
        "slices"
        "strconv"
        "strings"
        "sync"
        "time"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

const defaultPageSize = 10_000

// Entry is a point-in-time capture of a table's schema and rows.
type Entry struct {
        Schema         *enginepb.TableSchema
        Rows           []*enginepb.DataRow
        CreationTimeMs int64
        DeletionTimeMs int64
}

// Store retains deleted-table snapshots and live-table creation times
// for snapshot decorator resolution.
type Store struct {
        mu            sync.RWMutex
        creationTimes map[string]int64
        deleted       map[string][]Entry
}

// NewStore returns an empty snapshot store.
func NewStore() *Store {
        return &Store{
                creationTimes: map[string]int64{},
                deleted:       map[string][]Entry{},
        }
}

func tableKey(projectID, datasetID, tableID string) string {
        return projectID + ":" + datasetID + "." + tableID
}

// RecordCreation stamps the creation time for a live table. Called when
// a table is first registered so tables.get returns a stable epoch.
func (s *Store) RecordCreation(projectID, datasetID, tableID string, createdMs int64) {
        if s == nil {
                return
        }
        s.mu.Lock()
        defer s.mu.Unlock()
        key := tableKey(projectID, datasetID, tableID)
        if _, ok := s.creationTimes[key]; !ok {
                s.creationTimes[key] = createdMs
        }
}

// CreationTimeMs returns the recorded creation epoch for a live table.
func (s *Store) CreationTimeMs(projectID, datasetID, tableID string) (int64, bool) {
        if s == nil {
                return 0, false
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        t, ok := s.creationTimes[tableKey(projectID, datasetID, tableID)]
        return t, ok
}

// CaptureBeforeDelete snapshots schema and rows before DropTable.
func (s *Store) CaptureBeforeDelete(ctx context.Context, catalog enginepb.CatalogClient,
        projectID, datasetID, tableID string,
) error {
        if s == nil || catalog == nil {
                return nil
        }
        ref := &enginepb.TableRef{
                ProjectId: projectID,
                DatasetId: datasetID,
                TableId:   tableID,
        }
        desc, err := catalog.DescribeTable(ctx, &enginepb.DescribeTableRequest{Table: ref})
        if err != nil {
                return fmt.Errorf("describe table for snapshot: %w", err)
        }
        rows, err := listAllRows(ctx, catalog, ref, desc.GetSchema())
        if err != nil {
                return err
        }
        now := time.Now().UTC().UnixMilli()
        s.mu.Lock()
        defer s.mu.Unlock()
        key := tableKey(projectID, datasetID, tableID)
        created := s.creationTimes[key]
        if created == 0 {
                created = now
        }
        s.deleted[key] = append(s.deleted[key], Entry{
                Schema:         desc.GetSchema(),
                Rows:           rows,
                CreationTimeMs: created,
                DeletionTimeMs: now,
        })
        delete(s.creationTimes, key)
        return nil
}

// ResolveAtEpoch returns snapshot data for table@epoch decorators.
func (s *Store) ResolveAtEpoch(projectID, datasetID, tableID string, epochMs int64,
) (*Entry, error) {
        if s == nil {
                return nil, fmt.Errorf("table %s.%s.%s@%d not found (snapshot store unavailable)",
                        projectID, datasetID, tableID, epochMs)
        }
        s.mu.RLock()
        defer s.mu.RUnlock()
        entries := s.deleted[tableKey(projectID, datasetID, tableID)]
        for _, v := range slices.Backward(entries) {
                e := v
                if epochMs >= e.CreationTimeMs && epochMs <= e.DeletionTimeMs {
                        return &e, nil
                }
        }
        return nil, fmt.Errorf("not found: Table %s:%s.%s@%d", projectID, datasetID, tableID, epochMs)
}

// ParseDecorator splits tableId@epoch into base id and epoch milliseconds.
// Supports absolute (@123) and relative (@-3600000) decorators.
func ParseDecorator(tableID string) (base string, epochMs int64, decorated bool) {
        at := strings.LastIndex(tableID, "@")
        if at <= 0 || at == len(tableID)-1 {
                return tableID, 0, false
        }
        base = tableID[:at]
        raw := tableID[at+1:]
        if strings.HasPrefix(raw, "-") {
                offset, err := strconv.ParseInt(raw, 10, 64)
                if err != nil {
                        return tableID, 0, false
                }
                return base, time.Now().UTC().UnixMilli() + offset, true
        }
        epoch, err := strconv.ParseInt(raw, 10, 64)
        if err != nil {
                return tableID, 0, false
        }
        return base, epoch, true
}

func listAllRows(ctx context.Context, catalog enginepb.CatalogClient,
        ref *enginepb.TableRef, schema *enginepb.TableSchema,
) ([]*enginepb.DataRow, error) {
        var out []*enginepb.DataRow
        start := int64(0)
        for {
                resp, err := catalog.ListRows(ctx, &enginepb.ListRowsRequest{
                        Table:      ref,
                        StartIndex: start,
                        MaxResults: defaultPageSize,
                })
                if err != nil {
                        return nil, fmt.Errorf("list rows for snapshot: %w", err)
                }
                rows := resp.GetRows()
                if len(rows) == 0 {
                        break
                }
                out = append(out, rows...)
                start += int64(len(rows))
                if start >= resp.GetTotalRows() {
                        break
                }
        }
        _ = schema
        return out, nil
}

package sqltools

import (
        "net"
        "net/http"
        "strings"
)

// AccessConfig captures loopback / token gates for the SQL tools routes.
type AccessConfig struct {
        AllowRemote bool
        Token       string
}

// HeaderName is the canonical token header for remote SQL tools access.
const HeaderName = "X-BigQuery-Emulator-SqlTools-Token"

// CheckAccess enforces the loopback / token gates on r.
func (c AccessConfig) CheckAccess(r *http.Request) error {
        if !c.AllowRemote {
                if !isLoopback(r.RemoteAddr) {
                        return ErrAccessDenied
                }
        }
        if c.Token != "" {
                got := r.Header.Get(HeaderName)
                if !secureEqual(got, c.Token) {
                        return ErrAccessDenied
                }
        }
        return nil
}

// ErrAccessDenied is returned when access checks fail.
var ErrAccessDenied = httpError{code: http.StatusForbidden, msg: "sqltools: access denied"}

type httpError struct {
        code int
        msg  string
}

func (e httpError) Error() string { return e.msg }

// Status returns the HTTP status for this error.
func (e httpError) Status() int { return e.code }

func secureEqual(a, b string) bool {
        if len(a) != len(b) {
                return false
        }
        var diff byte
        for i := range len(a) {
                diff |= a[i] ^ b[i]
        }
        return diff == 0
}

func isLoopback(remoteAddr string) bool {
        host, _, err := net.SplitHostPort(remoteAddr)
        if err != nil {
                host = remoteAddr
        }
        host = strings.TrimSpace(host)
        if host == "" {
                return true
        }
        ip := net.ParseIP(host)
        if ip == nil {
                return false
        }
        return ip.IsLoopback()
}

package sqltools

import (
        "encoding/json"
        "io"
        "net/http"

        "github.com/vantaboard/bigquery-emulator/gateway/engine"
        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
        "google.golang.org/grpc/codes"
        "google.golang.org/grpc/status"
)

const (
        statusInvalid  = "invalid"
        errSQLRequired = "sql is required"
)

// HandlerDeps bundles dependencies for SQL tools HTTP handlers.
type HandlerDeps struct {
        Access AccessConfig
        Client *engine.Client
}

// RegisterRoutes installs SQL tools HTTP handlers under /api/emulator/sql/*.
func RegisterRoutes(mux *http.ServeMux, deps HandlerDeps) {
        mux.HandleFunc("GET /api/emulator/sql/capabilities", deps.handleCapabilities)
        mux.HandleFunc("POST /api/emulator/sql/format", deps.handleFormat)
        mux.HandleFunc("POST /api/emulator/sql/parse", deps.handleParse)
        mux.HandleFunc("POST /api/emulator/sql/tokenize", deps.handleTokenize)
        mux.HandleFunc("POST /api/emulator/sql/complete", deps.handleComplete)
        mux.HandleFunc("POST /api/emulator/sql/analyze", deps.handleAnalyze)
}

type errEnvelope struct {
        Code    int    `json:"code"`
        Status  string `json:"status"`
        Message string `json:"message"`
}

func writeJSON(w http.ResponseWriter, code int, v any) {
        w.Header().Set("Content-Type", "application/json")
        w.WriteHeader(code)
        _ = json.NewEncoder(w).Encode(v)
}

func writeAccessError(w http.ResponseWriter, err error) {
        if he, ok := err.(interface{ Status() int }); ok {
                writeJSON(w, he.Status(), errEnvelope{
                        Code:    he.Status(),
                        Status:  statusInvalid,
                        Message: err.Error(),
                })
                return
        }
        writeJSON(w, http.StatusForbidden, errEnvelope{
                Code:    http.StatusForbidden,
                Status:  statusInvalid,
                Message: err.Error(),
        })
}

func writeGrpcError(w http.ResponseWriter, err error) {
        st, ok := status.FromError(err)
        if !ok {
                writeJSON(w, http.StatusInternalServerError, errEnvelope{
                        Code:    http.StatusInternalServerError,
                        Status:  statusInvalid,
                        Message: err.Error(),
                })
                return
        }
        httpCode := http.StatusInternalServerError
        switch st.Code() {
        case codes.InvalidArgument:
                httpCode = http.StatusBadRequest
        case codes.NotFound:
                httpCode = http.StatusNotFound
        case codes.FailedPrecondition:
                httpCode = http.StatusPreconditionFailed
        case codes.Unimplemented:
                httpCode = http.StatusNotImplemented
        }
        writeJSON(w, httpCode, errEnvelope{
                Code:    httpCode,
                Status:  statusInvalid,
                Message: st.Message(),
        })
}

func (d HandlerDeps) requireClient(w http.ResponseWriter) bool {
        if d.Client == nil || d.Client.SQLTools == nil {
                writeJSON(w, http.StatusServiceUnavailable, errEnvelope{
                        Code:    http.StatusServiceUnavailable,
                        Status:  statusInvalid,
                        Message: "sql tools engine client is not configured",
                })
                return false
        }
        return true
}

func (d HandlerDeps) readBody(w http.ResponseWriter, r *http.Request) ([]byte, bool) {
        body, err := io.ReadAll(r.Body)
        if err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code:    http.StatusBadRequest,
                        Status:  statusInvalid,
                        Message: "Could not read request body: " + err.Error(),
                })
                return nil, false
        }
        return body, true
}

type formatRequest struct {
        offsetRequest
        SQL               string `json:"sql"`
        Strict            bool   `json:"strict"`
        LineLengthLimit   int32  `json:"lineLengthLimit"`
        IndentationSpaces int32  `json:"indentationSpaces"`
}

type formatResponse struct {
        FormattedSQL string           `json:"formattedSql"`
        Diagnostics  []diagnosticWire `json:"diagnostics,omitempty"`
}

func (d HandlerDeps) handleFormat(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        if !d.requireClient(w) {
                return
        }
        body, ok := d.readBody(w, r)
        if !ok {
                return
        }
        var req formatRequest
        if err := json.Unmarshal(body, &req); err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "invalid JSON: " + err.Error(),
                })
                return
        }
        if req.SQL == "" {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: errSQLRequired,
                })
                return
        }
        resp, err := d.Client.SQLTools.Format(r.Context(), &enginepb.FormatSqlRequest{
                Sql:               req.SQL,
                Strict:            req.Strict,
                LineLengthLimit:   req.LineLengthLimit,
                IndentationSpaces: req.IndentationSpaces,
        })
        if err != nil {
                writeGrpcError(w, err)
                return
        }
        out := formatResponse{FormattedSQL: resp.GetFormattedSql()}
        for _, diag := range resp.GetDiagnostics() {
                out.Diagnostics = append(out.Diagnostics,
                        diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
        }
        writeJSON(w, http.StatusOK, out)
}

type parseRequest struct {
        offsetRequest
        SQL string `json:"sql"`
}

type parseResponse struct {
        StatementKinds []string         `json:"statementKinds"`
        Diagnostics    []diagnosticWire `json:"diagnostics,omitempty"`
}

func (d HandlerDeps) handleParse(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        if !d.requireClient(w) {
                return
        }
        body, ok := d.readBody(w, r)
        if !ok {
                return
        }
        var req parseRequest
        if err := json.Unmarshal(body, &req); err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "invalid JSON: " + err.Error(),
                })
                return
        }
        if req.SQL == "" {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: errSQLRequired,
                })
                return
        }
        resp, err := d.Client.SQLTools.Parse(r.Context(), &enginepb.ParseSqlRequest{Sql: req.SQL})
        if err != nil {
                writeGrpcError(w, err)
                return
        }
        out := parseResponse{StatementKinds: resp.GetStatementKinds()}
        for _, diag := range resp.GetDiagnostics() {
                out.Diagnostics = append(out.Diagnostics,
                        diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
        }
        writeJSON(w, http.StatusOK, out)
}

type tokenizeRequest struct {
        offsetRequest
        SQL             string `json:"sql"`
        IncludeComments bool   `json:"includeComments"`
}

type tokenizeResponse struct {
        Tokens      []tokenWire      `json:"tokens"`
        Diagnostics []diagnosticWire `json:"diagnostics,omitempty"`
}

func (d HandlerDeps) handleTokenize(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        if !d.requireClient(w) {
                return
        }
        body, ok := d.readBody(w, r)
        if !ok {
                return
        }
        var req tokenizeRequest
        if err := json.Unmarshal(body, &req); err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "invalid JSON: " + err.Error(),
                })
                return
        }
        resp, err := d.Client.SQLTools.Tokenize(r.Context(), &enginepb.TokenizeSqlRequest{
                Sql: req.SQL, IncludeComments: req.IncludeComments,
        })
        if err != nil {
                writeGrpcError(w, err)
                return
        }
        out := tokenizeResponse{}
        for _, tok := range resp.GetTokens() {
                out.Tokens = append(out.Tokens, tokenFromProto(req.SQL, req.OffsetUnit, tok))
        }
        for _, diag := range resp.GetDiagnostics() {
                out.Diagnostics = append(out.Diagnostics,
                        diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
        }
        writeJSON(w, http.StatusOK, out)
}

type completeRequest struct {
        offsetRequest
        ProjectID        string `json:"projectId"`
        DefaultDatasetID string `json:"defaultDatasetId"`
        SQL              string `json:"sql"`
        CursorByteOffset int32  `json:"cursorByteOffset"`
}

type candidateWire struct {
        Label      string `json:"label"`
        Kind       string `json:"kind"`
        InsertText string `json:"insertText"`
        Detail     string `json:"detail,omitempty"`
        Fqn        string `json:"fqn,omitempty"`
}

type completeResponse struct {
        Candidates       []candidateWire `json:"candidates"`
        ReplacementStart int32           `json:"replacementStart"`
        ReplacementEnd   int32           `json:"replacementEnd"`
}

func (d HandlerDeps) handleComplete(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        if !d.requireClient(w) {
                return
        }
        body, ok := d.readBody(w, r)
        if !ok {
                return
        }
        var req completeRequest
        if err := json.Unmarshal(body, &req); err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "invalid JSON: " + err.Error(),
                })
                return
        }
        if req.ProjectID == "" {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "projectId is required",
                })
                return
        }
        cursor := convertCursorToUTF8(req.SQL, req.OffsetUnit, req.CursorByteOffset)
        resp, err := d.Client.SQLTools.Complete(r.Context(), &enginepb.CompleteSqlRequest{
                ProjectId:        req.ProjectID,
                DefaultDatasetId: req.DefaultDatasetID,
                Sql:              req.SQL,
                CursorByteOffset: cursor,
        })
        if err != nil {
                writeGrpcError(w, err)
                return
        }
        replStart, replEnd := convertReplacementFromUTF8(
                req.SQL, req.OffsetUnit, resp.GetReplacementStart(), resp.GetReplacementEnd())
        out := completeResponse{
                ReplacementStart: replStart,
                ReplacementEnd:   replEnd,
        }
        for _, c := range resp.GetCandidates() {
                out.Candidates = append(out.Candidates, candidateWire{
                        Label: c.GetLabel(), Kind: c.GetKind(), InsertText: c.GetInsertText(),
                        Detail: c.GetDetail(), Fqn: c.GetFqn(),
                })
        }
        writeJSON(w, http.StatusOK, out)
}

type capabilitiesResponse struct {
        SQLTools    bool     `json:"sqlTools"`
        Version     string   `json:"version"`
        Endpoints   []string `json:"endpoints"`
        OffsetUnits []string `json:"offsetUnits"`
}

func (d HandlerDeps) handleCapabilities(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        writeJSON(w, http.StatusOK, capabilitiesResponse{
                SQLTools: true,
                Version:  sqlToolsVersion,
                Endpoints: []string{
                        "format", "parse", "tokenize", "complete", "analyze", "capabilities",
                },
                OffsetUnits: []string{offsetUnitUTF8, offsetUnitUTF16},
        })
}

type analyzeRequest struct {
        offsetRequest
        ProjectID        string `json:"projectId"`
        DefaultDatasetID string `json:"defaultDatasetId"`
        SQL              string `json:"sql"`
}

type referencedTableWire struct {
        ProjectID string `json:"projectId"`
        DatasetID string `json:"datasetId"`
        TableID   string `json:"tableId"`
        Alias     string `json:"alias,omitempty"`
        Kind      string `json:"kind"`
}

type analyzeResponse struct {
        ReferencedTables []referencedTableWire `json:"referencedTables"`
        StatementKinds   []string              `json:"statementKinds"`
        Diagnostics      []diagnosticWire      `json:"diagnostics,omitempty"`
}

func (d HandlerDeps) handleAnalyze(w http.ResponseWriter, r *http.Request) {
        if err := d.Access.CheckAccess(r); err != nil {
                writeAccessError(w, err)
                return
        }
        if !d.requireClient(w) {
                return
        }
        body, ok := d.readBody(w, r)
        if !ok {
                return
        }
        var req analyzeRequest
        if err := json.Unmarshal(body, &req); err != nil {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "invalid JSON: " + err.Error(),
                })
                return
        }
        if req.ProjectID == "" {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: "projectId is required",
                })
                return
        }
        if req.SQL == "" {
                writeJSON(w, http.StatusBadRequest, errEnvelope{
                        Code: http.StatusBadRequest, Status: statusInvalid,
                        Message: errSQLRequired,
                })
                return
        }
        resp, err := d.Client.SQLTools.Analyze(r.Context(), &enginepb.AnalyzeSqlRequest{
                ProjectId:        req.ProjectID,
                DefaultDatasetId: req.DefaultDatasetID,
                Sql:              req.SQL,
        })
        if err != nil {
                writeGrpcError(w, err)
                return
        }
        out := analyzeResponse{StatementKinds: resp.GetStatementKinds()}
        for _, diag := range resp.GetDiagnostics() {
                out.Diagnostics = append(out.Diagnostics,
                        diagnosticFromProto(req.SQL, req.OffsetUnit, diag))
        }
        for _, table := range resp.GetReferencedTables() {
                out.ReferencedTables = append(out.ReferencedTables, referencedTableWire{
                        ProjectID: table.GetProjectId(),
                        DatasetID: table.GetDatasetId(),
                        TableID:   table.GetTableId(),
                        Alias:     table.GetAlias(),
                        Kind:      table.GetKind(),
                })
        }
        writeJSON(w, http.StatusOK, out)
}

package sqltools

import (
        "math"
        "strings"
        "unicode/utf16"

        "github.com/vantaboard/bigquery-emulator/gateway/enginepb"
)

const (
        offsetUnitUTF8  = "utf8"
        offsetUnitUTF16 = "utf16"
        sqlToolsVersion = "1.0"
)

func normalizeOffsetUnit(unit string) string {
        switch strings.ToLower(strings.TrimSpace(unit)) {
        case offsetUnitUTF16:
                return offsetUnitUTF16
        default:
                return offsetUnitUTF8
        }
}

func utf8ByteOffsetToCodeUnit(sql string, byteOffset int) int {
        if byteOffset <= 0 {
                return 0
        }
        if byteOffset > len(sql) {
                byteOffset = len(sql)
        }
        return len(utf16.Encode([]rune(sql[:byteOffset])))
}

func codeUnitToUtf8ByteOffset(sql string, codeUnit int) int {
        if codeUnit <= 0 {
                return 0
        }
        units := utf16.Encode([]rune(sql))
        if codeUnit > len(units) {
                codeUnit = len(units)
        }
        if codeUnit == 0 {
                return 0
        }
        return len(string(utf16.Decode(units[:codeUnit])))
}

func int32FromInt(v int) int32 {
        if v > int(math.MaxInt32) {
                return math.MaxInt32
        }
        if v < int(math.MinInt32) {
                return math.MinInt32
        }
        return int32(v)
}

func int32Ptr(v int32) *int32 {
        if v < 0 {
                return nil
        }
        out := v
        return &out
}

func convertCursorToUTF8(sql string, unit string, cursor int32) int32 {
        if normalizeOffsetUnit(unit) != offsetUnitUTF16 {
                return cursor
        }
        return int32FromInt(codeUnitToUtf8ByteOffset(sql, int(cursor)))
}

func convertReplacementFromUTF8(sql string, unit string, start, end int32) (int32, int32) {
        if normalizeOffsetUnit(unit) != offsetUnitUTF16 {
                return start, end
        }
        return int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(start))),
                int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(end)))
}

type diagnosticWire struct {
        Line       int32  `json:"line"`
        Column     int32  `json:"column"`
        Message    string `json:"message"`
        Severity   string `json:"severity"`
        EndLine    int32  `json:"endLine,omitempty"`
        EndColumn  int32  `json:"endColumn,omitempty"`
        StartByte  *int32 `json:"startByte,omitempty"`
        EndByte    *int32 `json:"endByte,omitempty"`
        StartUTF16 *int32 `json:"startUtf16,omitempty"`
        EndUTF16   *int32 `json:"endUtf16,omitempty"`
}

func diagnosticFromProto(sql string, unit string, diag *enginepb.SqlDiagnostic) diagnosticWire {
        out := diagnosticWire{
                Line:      diag.GetLine(),
                Column:    diag.GetColumn(),
                Message:   diag.GetMessage(),
                Severity:  diag.GetSeverity(),
                EndLine:   diag.GetEndLine(),
                EndColumn: diag.GetEndColumn(),
                StartByte: int32Ptr(diag.GetStartByte()),
                EndByte:   int32Ptr(diag.GetEndByte()),
        }
        if normalizeOffsetUnit(unit) == offsetUnitUTF16 && sql != "" {
                if out.StartByte != nil {
                        out.StartUTF16 = int32Ptr(
                                int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(*out.StartByte))))
                }
                if out.EndByte != nil {
                        out.EndUTF16 = int32Ptr(
                                int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(*out.EndByte))))
                }
        }
        return out
}

type tokenWire struct {
        Kind       string `json:"kind"`
        Image      string `json:"image"`
        StartByte  int32  `json:"startByte"`
        EndByte    int32  `json:"endByte"`
        StartUTF16 *int32 `json:"startUtf16,omitempty"`
        EndUTF16   *int32 `json:"endUtf16,omitempty"`
}

func tokenFromProto(sql string, unit string, tok *enginepb.SqlToken) tokenWire {
        out := tokenWire{
                Kind:      tok.GetKind(),
                Image:     tok.GetImage(),
                StartByte: tok.GetStartByte(),
                EndByte:   tok.GetEndByte(),
        }
        if normalizeOffsetUnit(unit) == offsetUnitUTF16 && sql != "" {
                out.StartUTF16 = int32Ptr(
                        int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(out.StartByte))))
                out.EndUTF16 = int32Ptr(
                        int32FromInt(utf8ByteOffsetToCodeUnit(sql, int(out.EndByte))))
        }
        return out
}

type offsetRequest struct {
        OffsetUnit string `json:"offsetUnit"`
}

// Package storagetmpl materializes an initial-data template tree into
// the persistent storage root the engine reads on startup.
//
// An operator (or a
// container image) ships a pre-populated catalog plus row files, and
// the emulator should copy them into the runtime data-dir on first
// boot so a downstream client sees a populated catalog at t=0. Once
// the data-dir is initialized, subsequent boots are no-ops -- the
// template is only ever copied into an empty (or absent) destination
// so we never clobber writes a previous run committed.
//
// "Initialized" detection. This repo's engine uses DuckDB, which
// keeps its catalog in a single file named `catalog.duckdb` plus
// sidecar `.parquet` / `.meta.json` files (see
// backend/storage/duckdb/duckdb_storage.cc). The presence of
// `catalog.duckdb` is therefore our sentinel: when it already
// exists in the destination we leave the tree alone and assume a
// prior run owns it. When the destination is absent OR exists but
// does not yet contain a `catalog.duckdb` file, we treat it as a
// fresh data-dir and copy the entire template tree in.
package storagetmpl

import (
        "errors"
        "fmt"
        "io"
        "io/fs"
        "os"
        "path/filepath"
)

// catalogSentinel is the file whose presence in the destination
// data-dir means the engine has already initialized this catalog.
// Exported as a package-level var so tests can swap it for a
// storage-backend-specific sentinel without rewriting the helper.
var catalogSentinel = "catalog.duckdb"

// MaybeMaterialize copies `template` into `dataDir` when `dataDir`
// does not yet contain the engine's initialized-catalog sentinel.
//
// Returns nil (no-op) when:
//   - template is "" (operator did not configure an initial-data-dir).
//   - dataDir is "" (the engine will use its in-memory fallback; there
//     is no on-disk tree to seed).
//   - dataDir already contains the sentinel file (a previous run owns
//     the catalog; copying would clobber writes).
//
// Returns an error when:
//   - template does not exist, is not a directory, or is unreadable.
//   - dataDir exists but is not a directory.
//   - any file copy fails (disk full, permission denied, ...).
//
// The copy preserves file mode bits but does NOT preserve
// ownership/atime/mtime; the engine does not depend on either, and
// `os.Chown` would require CAP_CHOWN for cross-uid operator
// scenarios that are out of scope here.
func MaybeMaterialize(template, dataDir string) error {
        if template == "" || dataDir == "" {
                return nil
        }
        tplInfo, err := os.Stat(template)
        if err != nil {
                return fmt.Errorf("storagetmpl: stat template %q: %w", template, err)
        }
        if !tplInfo.IsDir() {
                return fmt.Errorf("storagetmpl: template %q is not a directory", template)
        }

        // If the destination already exists, ensure it's a directory.
        dstInfo, err := os.Stat(dataDir)
        switch {
        case err == nil:
                if !dstInfo.IsDir() {
                        return fmt.Errorf("storagetmpl: data-dir %q exists but is not a directory", dataDir)
                }
                // Already initialized? Sentinel presence wins; treat
                // this run as a continuation rather than reseeding.
                if isInitialized(dataDir) {
                        return nil
                }
        case errors.Is(err, fs.ErrNotExist):
                if mkErr := os.MkdirAll(dataDir, 0o750); mkErr != nil {
                        return fmt.Errorf("storagetmpl: create data-dir %q: %w", dataDir, mkErr)
                }
        default:
                return fmt.Errorf("storagetmpl: stat data-dir %q: %w", dataDir, err)
        }

        if err := copyTree(template, dataDir); err != nil {
                return fmt.Errorf("storagetmpl: copy %q -> %q: %w", template, dataDir, err)
        }
        return nil
}

// isInitialized reports whether dataDir already contains the
// catalog sentinel. Errors are treated as "not initialized" so a
// permission glitch causes the copy to proceed and fail with a
// concrete error rather than silently skipping.
func isInitialized(dataDir string) bool {
        info, err := os.Stat(filepath.Join(dataDir, catalogSentinel))
        return err == nil && !info.IsDir()
}

// copyTree walks src and mirrors its layout under dst. Existing
// destination files are overwritten -- callers gate this entire
// function on `isInitialized` so the operator must explicitly
// remove `catalog.duckdb` to reseed.
func copyTree(src, dst string) error {
        return filepath.WalkDir(src, func(path string, d fs.DirEntry, err error) error {
                if err != nil {
                        return err
                }
                rel, err := filepath.Rel(src, path)
                if err != nil {
                        return err
                }
                target := filepath.Join(dst, rel)

                if d.IsDir() {
                        info, infoErr := d.Info()
                        if infoErr != nil {
                                return infoErr
                        }
                        return os.MkdirAll(target, info.Mode().Perm())
                }
                if d.Type()&fs.ModeSymlink != 0 {
                        // Resolve and copy the symlink's content; the
                        // template is operator-supplied so we never
                        // preserve the link itself (avoids escapes
                        // outside dst).
                        resolved, linkErr := os.Readlink(path)
                        if linkErr != nil {
                                return linkErr
                        }
                        realPath := resolved
                        if !filepath.IsAbs(realPath) {
                                realPath = filepath.Join(filepath.Dir(path), resolved)
                        }
                        realInfo, statErr := os.Stat(realPath)
                        if statErr != nil {
                                return statErr
                        }
                        if realInfo.IsDir() {
                                return os.MkdirAll(target, realInfo.Mode().Perm())
                        }
                        return copyFile(realPath, target, realInfo.Mode().Perm())
                }
                info, infoErr := d.Info()
                if infoErr != nil {
                        return infoErr
                }
                return copyFile(path, target, info.Mode().Perm())
        })
}

// copyFile copies src -> dst with the given mode. The destination's
// parent is created on demand so a deeply nested template root works
// without the caller pre-creating directories.
//
// `src` and `dst` come from an operator-supplied template tree
// walked by filepath.WalkDir; gosec G304/G306 fire on the variable
// open / Chmod paths but that is the entire point of the helper
// (we're materializing an operator-named directory), so we suppress
// the warnings inline.
func copyFile(src, dst string, mode fs.FileMode) error {
        if mkErr := os.MkdirAll(filepath.Dir(dst), 0o750); mkErr != nil {
                return mkErr
        }
        in, err := os.Open(src) //nolint:gosec // src walks an operator-supplied template tree
        if err != nil {
                return err
        }
        defer func() { _ = in.Close() }()

        // Use O_TRUNC so a partially-copied destination from a prior
        // crashed boot does not produce a frankenfile. dst here is
        // the operator-supplied data-dir; the gosec warning is the
        // entire point of the helper.
        out, err := os.OpenFile( //nolint:gosec // dst is the operator-supplied data-dir
                dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode,
        )
        if err != nil {
                return err
        }
        if _, copyErr := io.Copy(out, in); copyErr != nil {
                _ = out.Close()
                return copyErr
        }
        return out.Close()
}

// Command check-disposition-parity verifies that the per-node
// disposition table the engine consumes
// (`backend/engine/duckdb/transpiler/node_dispositions.yaml`)
// agrees row-for-row with the human-readable mirror in
// `backend/engine/duckdb/transpiler/SHAPE_TRACKER.md`.
//
// The two files MUST stay in lock-step: the YAML is the
// machine-readable source of truth the engine router and the
// generated `node_dispositions_table.inc` consume; the markdown is
// the human-readable mirror referenced from ROADMAP.md and the
// per-plan docs. A drift between them means either the engine and
// the docs disagree on what route a node kind takes, or the
// `(planned)` annotation in the docs has stopped tracking reality.
//
// Usage (typical):
//
//        go run ./tools/check_disposition_parity        # check repo files
//        go run ./tools/check_disposition_parity \
//            --yaml=path/to/node_dispositions.yaml \
//            --shape-tracker=path/to/SHAPE_TRACKER.md
//
// Exit codes mirror the Go `flag` package convention:
//
//   - 0: tables agree
//   - 1: drift detected (the report names every offending row)
//   - 2: usage error (missing or unreadable input file)
//
// The deliberately-thin CLI surface keeps the checker drop-in
// runnable from `task lint:dispositions` (the canonical
// developer-facing entry point) and from the CI lint job, with no
// extra flags needed for the common case.
package main

import (
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
        "path/filepath"
)

const (
        defaultYAMLRel         = "backend/engine/duckdb/transpiler/node_dispositions.yaml"
        defaultShapeTrackerRel = "backend/engine/duckdb/transpiler/SHAPE_TRACKER.md"
)

var (
        errUsage    = errors.New("usage error")
        errFindings = errors.New("parity findings")
)

func main() {
        if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
                switch {
                case errors.Is(err, errUsage):
                        os.Exit(2)
                case errors.Is(err, errFindings):
                        os.Exit(1)
                default:
                        _, _ = fmt.Fprintf(os.Stderr, "check-disposition-parity: %v\n", err)
                        os.Exit(1)
                }
        }
}

// run is the testable entry point. It returns an error rather than
// calling os.Exit so tests can drive the full code path with
// table-driven fixtures.
func run(args []string, stdout, stderr io.Writer) error {
        fs := flag.NewFlagSet("check-disposition-parity", flag.ContinueOnError)
        fs.SetOutput(stderr)
        yamlPath := fs.String("yaml", "",
                "Path to node_dispositions.yaml (defaults to the repo-relative "+
                        "path under the current working directory).")
        shapePath := fs.String("shape-tracker", "",
                "Path to SHAPE_TRACKER.md (defaults to the repo-relative path "+
                        "under the current working directory).")
        if err := fs.Parse(args); err != nil {
                return errUsage
        }
        if fs.NArg() > 0 {
                _, _ = fmt.Fprintf(stderr,
                        "check-disposition-parity: unexpected positional args: %v\n",
                        fs.Args())
                return errUsage
        }

        yp, sp, err := resolvePaths(*yamlPath, *shapePath)
        if err != nil {
                _, _ = fmt.Fprintf(stderr, "check-disposition-parity: %v\n", err)
                return errUsage
        }

        // gosec G304: this CLI takes user-controlled paths by design (the
        // --yaml / --shape-tracker flags); the same posture applies as
        // the rest of the in-repo lint tooling that consumes file lists.
        yamlBytes, err := os.ReadFile(yp) //nolint:gosec // user-controlled lint input
        if err != nil {
                return fmt.Errorf("read %s: %w", yp, err)
        }
        shapeBytes, err := os.ReadFile(sp) //nolint:gosec // user-controlled lint input
        if err != nil {
                return fmt.Errorf("read %s: %w", sp, err)
        }

        yamlRows, err := parseYAML(string(yamlBytes))
        if err != nil {
                return fmt.Errorf("parse %s: %w", yp, err)
        }
        shapeRows, err := parseShapeTracker(string(shapeBytes))
        if err != nil {
                return fmt.Errorf("parse %s: %w", sp, err)
        }

        findings := compareParity(yamlRows, shapeRows)
        if len(findings) == 0 {
                _, _ = fmt.Fprintf(stdout,
                        "check-disposition-parity: ok (%d YAML rows, %d SHAPE_TRACKER rows)\n",
                        len(yamlRows), len(shapeRows))
                return nil
        }
        for _, f := range findings {
                _, _ = fmt.Fprintln(stderr, f)
        }
        _, _ = fmt.Fprintf(stderr,
                "check-disposition-parity: %d disagreement(s) between %s and %s\n",
                len(findings), yp, sp)
        return errFindings
}

// resolvePaths fills in the defaults when the caller did not pass
// explicit --yaml / --shape-tracker flags. The defaults are the
// well-known repo-relative paths against the current working
// directory; callers run from the repo root by convention (via
// `task lint:dispositions`).
func resolvePaths(yamlFlag, shapeFlag string) (yp, sp string, err error) {
        yp = yamlFlag
        sp = shapeFlag
        if yp == "" {
                yp = filepath.FromSlash(defaultYAMLRel)
        }
        if sp == "" {
                sp = filepath.FromSlash(defaultShapeTrackerRel)
        }
        if _, statErr := os.Stat(yp); statErr != nil {
                return "", "", fmt.Errorf("--yaml path %q is not readable: %w", yp, statErr)
        }
        if _, statErr := os.Stat(sp); statErr != nil {
                return "", "", fmt.Errorf("--shape-tracker path %q is not readable: %w", sp, statErr)
        }
        return yp, sp, nil
}

package main

import (
        "fmt"
        "sort"
        "strings"
)

// Route name constants. Shared with
// `backend/engine/disposition.h`, `node_dispositions.yaml`, and
// `SHAPE_TRACKER.md`. Adding a new route means editing the enum,
// both data files, the generators, and this file in lock-step.
const (
        routeDuckdbNative     = "duckdb_native"
        routeDuckdbRewrite    = "duckdb_rewrite"
        routeDuckdbUDF        = "duckdb_udf"
        routeSemanticExecutor = "semantic_executor"
        routeControlOp        = "control_op"
        routeLocalStub        = "local_stub"
        routeUnsupported      = "unsupported"
)

// validDispositions is the closed set of route names.
var validDispositions = map[string]struct{}{
        routeDuckdbNative:     {},
        routeDuckdbRewrite:    {},
        routeDuckdbUDF:        {},
        routeSemanticExecutor: {},
        routeControlOp:        {},
        routeLocalStub:        {},
        routeUnsupported:      {},
}

// yamlRow is a single entry from node_dispositions.yaml.
type yamlRow struct {
        // node is the GoogleSQL ResolvedAST class name (no backticks,
        // case-sensitive).
        node string
        // disposition is the lowercase route name (one of
        // validDispositions).
        disposition string
        // lineNumber points back at the source YAML line for diagnostics.
        lineNumber int
}

// shapeRow is a single row from the SHAPE_TRACKER.md tables.
type shapeRow struct {
        // nodes is the list of node-kind tokens the row's `Node` cell
        // covered (composite cells like `Foo / Bar` and wildcards like
        // `ResolvedGraph*Scan` expand into multiple tokens; wildcards
        // keep their `*` and are matched against the YAML by
        // matchesWildcard below).
        nodes []string
        // disposition is the lowercase route name from the row's
        // `Status` cell (one of validDispositions). Any suffix the
        // status cell carried (e.g. `(subset)`) is stripped.
        disposition string
        // lineNumber points back at the source markdown line for
        // diagnostics.
        lineNumber int
}

// parseYAML is the line-oriented reader for node_dispositions.yaml.
//
// We re-implement a narrow YAML reader rather than pull in
// gopkg.in/yaml.v3 so the parity check inherits the same "no extra
// deps to bootstrap" property the awk-based table generator already
// has. The same grammar:
//
//        <NodeKind>: <disposition> [status=planned]
//
// applies; we only care about the `<NodeKind>` and `<disposition>`
// tokens here (the plan / status metadata is relevant to the
// generator and to runtime callers, not to parity).
func parseYAML(src string) ([]yamlRow, error) {
        var rows []yamlRow
        for lineNo, raw := range strings.Split(src, "\n") {
                line := stripInlineComment(raw)
                line = strings.TrimSpace(line)
                if line == "" {
                        continue
                }
                before, after, ok := strings.Cut(line, ":")
                if !ok {
                        return nil, fmt.Errorf("line %d: missing `:` separator: %q",
                                lineNo+1, raw)
                }
                key := strings.TrimSpace(before)
                rest := strings.TrimSpace(after)
                if key == "" {
                        return nil, fmt.Errorf("line %d: empty node-kind key: %q",
                                lineNo+1, raw)
                }
                tokens := strings.Fields(rest)
                if len(tokens) == 0 {
                        return nil, fmt.Errorf("line %d: missing disposition for %q",
                                lineNo+1, key)
                }
                disp := tokens[0]
                if _, ok := validDispositions[disp]; !ok {
                        return nil, fmt.Errorf(
                                "line %d: unknown disposition %q for node %q",
                                lineNo+1, disp, key)
                }
                rows = append(rows, yamlRow{
                        node:        key,
                        disposition: disp,
                        lineNumber:  lineNo + 1,
                })
        }
        return rows, nil
}

// stripInlineComment removes a trailing `# ...` from a YAML line. We
// take a deliberately narrow view of "inline comment": any `#`
// preceded by whitespace (or at column 0) starts a comment. That is
// enough for the disposition tables (which never put `#` inside a
// node name or a disposition word) and keeps the parser tiny.
func stripInlineComment(s string) string {
        for i := 0; i < len(s); i++ {
                if s[i] != '#' {
                        continue
                }
                if i == 0 || s[i-1] == ' ' || s[i-1] == '\t' {
                        return s[:i]
                }
        }
        return s
}

// parseShapeTracker extracts the per-node disposition table rows
// from SHAPE_TRACKER.md. Only table rows whose Node cell contains
// at least one identifier matching "starts with `Resolved`" are
// considered; that filters out the explanatory header rows and the
// summary paragraphs that follow each table.
func parseShapeTracker(src string) ([]shapeRow, error) {
        var rows []shapeRow
        for lineNo, raw := range strings.Split(src, "\n") {
                line := strings.TrimSpace(raw)
                if !strings.HasPrefix(line, "|") {
                        continue
                }
                // Skip the table header / separator lines (the separator
                // has `---` cells, the header has the literal "Node" label).
                if strings.Contains(line, "---") {
                        continue
                }
                cells := splitMarkdownRow(line)
                if len(cells) < 2 {
                        continue
                }
                nodeCell := strings.TrimSpace(cells[0])
                // The header row's first cell is `Node`, which we drop here
                // (and also any other non-Resolved-prefixed first cell).
                nodes := extractNodes(nodeCell)
                if len(nodes) == 0 {
                        continue
                }
                statusCell := strings.TrimSpace(cells[1])
                disposition, err := extractDisposition(statusCell)
                if err != nil {
                        return nil, fmt.Errorf("line %d: %w", lineNo+1, err)
                }
                rows = append(rows, shapeRow{
                        nodes:       nodes,
                        disposition: disposition,
                        lineNumber:  lineNo + 1,
                })
        }
        return rows, nil
}

// splitMarkdownRow splits a `| a | b | c |` pipe-table row into its
// cells. Backticks may contain `|` characters in some markdown
// dialects; SHAPE_TRACKER.md does not exercise that case so we use
// the simple split-on-`|` approach instead of dragging in a real
// markdown parser.
func splitMarkdownRow(line string) []string {
        line = strings.TrimPrefix(line, "|")
        line = strings.TrimSuffix(line, "|")
        return strings.Split(line, "|")
}

// extractNodes pulls the per-class identifiers out of a `Node` cell.
// Supports:
//
//   - single class names (“ `ResolvedQueryStmt` “)
//   - slash-joined composite rows (“ `Foo` / `Bar` “)
//   - wildcard families (“ `ResolvedGraph*Scan` “)
//
// Everything that does not look like a class identifier (e.g. the
// header row's literal "Node" text) returns an empty slice so the
// caller skips the row.
func extractNodes(cell string) []string {
        cell = strings.ReplaceAll(cell, "\\*", "*")
        parts := strings.Split(cell, "/")
        var out []string
        for _, p := range parts {
                p = strings.TrimSpace(p)
                p = strings.Trim(p, "`")
                p = strings.TrimSpace(p)
                if p == "" {
                        continue
                }
                // We only care about identifiers; anything else (the
                // header row's "Node" label, the column-separator's
                // `---`, etc.) does not start with `Resolved` and would
                // have already been filtered by the caller.
                if !strings.HasPrefix(p, "Resolved") {
                        return nil
                }
                out = append(out, p)
        }
        return out
}

// extractDisposition pulls the canonical route name out of a Status
// cell. Status cells look like:
//
//        `duckdb_native`
//        `duckdb_native` (subset)
//        `duckdb_native` (planned)
//
// We strip the trailing `(subset)` / `(planned)` annotations and
// keep the first backticked word as the disposition.
func extractDisposition(cell string) (string, error) {
        open := strings.IndexByte(cell, '`')
        if open == -1 {
                return "", fmt.Errorf("status cell has no backticked disposition: %q", cell)
        }
        close := strings.IndexByte(cell[open+1:], '`')
        if close == -1 {
                return "", fmt.Errorf("unterminated backtick in status cell: %q", cell)
        }
        word := strings.TrimSpace(cell[open+1 : open+1+close])
        if _, ok := validDispositions[word]; !ok {
                return "", fmt.Errorf("unknown disposition %q in status cell: %q",
                        word, cell)
        }
        return word, nil
}

// compareParity returns a sorted list of findings -- empty when the
// two sources agree. Findings cover three cases:
//
//  1. A SHAPE_TRACKER node has no matching YAML row.
//  2. A SHAPE_TRACKER node's disposition disagrees with the YAML row.
//  3. A YAML node has no matching SHAPE_TRACKER row.
//
// SHAPE_TRACKER wildcards (`ResolvedGraph*Scan`) match every YAML
// node whose name fits the pattern; every matched YAML row must
// share the wildcard's disposition.
func compareParity(yaml []yamlRow, shape []shapeRow) []string {
        yamlByName := make(map[string]yamlRow, len(yaml))
        for _, r := range yaml {
                yamlByName[r.node] = r
        }
        seen := make(map[string]bool, len(yaml))
        var out []string
        for _, row := range shape {
                out = append(out, walkShapeRow(row, yaml, yamlByName, seen)...)
        }
        for _, yr := range yaml {
                if seen[yr.node] {
                        continue
                }
                out = append(out, fmt.Sprintf(
                        "node_dispositions.yaml line %d has %s -> %s but "+
                                "SHAPE_TRACKER.md has no matching row",
                        yr.lineNumber, yr.node, yr.disposition))
        }
        sort.Strings(out)
        return out
}

// walkShapeRow expands one SHAPE_TRACKER row into per-token
// findings (exact + wildcard) and marks the matched YAML rows in
// `seen`. Pulled out so `compareParity` stays under the funlen
// linter cap and so the exact/wildcard branches are testable in
// isolation if a future plan grows them.
func walkShapeRow(
        row shapeRow,
        yaml []yamlRow,
        yamlByName map[string]yamlRow,
        seen map[string]bool,
) []string {
        var out []string
        for _, token := range row.nodes {
                if strings.Contains(token, "*") {
                        out = append(out, expandWildcard(row, token, yaml, seen)...)
                        continue
                }
                yr, ok := yamlByName[token]
                if !ok {
                        out = append(out, fmt.Sprintf(
                                "SHAPE_TRACKER.md line %d references %s but "+
                                        "node_dispositions.yaml has no matching row",
                                row.lineNumber, token))
                        continue
                }
                seen[yr.node] = true
                if yr.disposition != row.disposition {
                        out = append(out, mismatchFinding(row, yr, token))
                }
        }
        return out
}

// expandWildcard handles the `Foo*Bar` form of a SHAPE_TRACKER row
// token: it must match at least one YAML row, and every matched
// row's disposition must agree with the wildcard row's.
func expandWildcard(
        row shapeRow,
        token string,
        yaml []yamlRow,
        seen map[string]bool,
) []string {
        matched := false
        var out []string
        for _, yr := range yaml {
                if !matchesWildcard(token, yr.node) {
                        continue
                }
                matched = true
                seen[yr.node] = true
                if yr.disposition != row.disposition {
                        out = append(out, mismatchFinding(row, yr, yr.node))
                }
        }
        if !matched {
                out = append(out, fmt.Sprintf(
                        "SHAPE_TRACKER.md line %d references wildcard %s "+
                                "but node_dispositions.yaml has no matching row",
                        row.lineNumber, token))
        }
        return out
}

// mismatchFinding is the canonical disposition-disagreement
// message format. Centralised so the two call sites in
// walkShapeRow / expandWildcard cannot drift.
func mismatchFinding(row shapeRow, yr yamlRow, displayName string) string {
        return fmt.Sprintf(
                "disposition mismatch: SHAPE_TRACKER.md line %d says %s -> %s, "+
                        "node_dispositions.yaml line %d says %s -> %s",
                row.lineNumber, displayName, row.disposition,
                yr.lineNumber, yr.node, yr.disposition)
}

// matchesWildcard reports whether `name` matches the glob-style
// `pattern`. Only `*` is supported (it stands for any run of
// characters); SHAPE_TRACKER's wildcards only use the `*` form
// (e.g. `ResolvedGraph*Scan`).
func matchesWildcard(pattern, name string) bool {
        if !strings.Contains(pattern, "*") {
                return pattern == name
        }
        parts := strings.Split(pattern, "*")
        if !strings.HasPrefix(name, parts[0]) {
                return false
        }
        rest := name[len(parts[0]):]
        for i := 1; i < len(parts); i++ {
                p := parts[i]
                if i == len(parts)-1 {
                        return strings.HasSuffix(rest, p)
                }
                idx := strings.Index(rest, p)
                if idx == -1 {
                        return false
                }
                rest = rest[idx+len(p):]
        }
        return true
}

package main

import (
        "encoding/json"
        "fmt"
        "io"
        "os"
        "strconv"
)

// badgeJSON matches the shape shields.io's "endpoint" badge consumer
// requires. See https://shields.io/badges/endpoint-badge for the
// schema. We deliberately keep this struct narrow (no `cacheSeconds`
// override etc) because the README badges use shields.io's defaults
// and we don't want the badge JSON to grow extra knobs without a
// reason.
type badgeJSON struct {
        SchemaVersion int    `json:"schemaVersion"`
        Label         string `json:"label"`
        Message       string `json:"message"`
        Color         string `json:"color"`
}

// Color thresholds for the badges. These match the colour bands
// codecov uses on its default badges to keep the visual transition
// from the old badges to the new ones smooth.
const (
        colorBrightGreen = "brightgreen" // [90, 100]
        colorGreen       = "green"       // [80, 90)
        colorYellowGreen = "yellowgreen" // [70, 80)
        colorYellow      = "yellow"      // [60, 70)
        colorOrange      = "orange"      // [40, 60)
        colorRed         = "red"         // [0, 40)
        colorLightgrey   = "lightgrey"   // missing data
)

// badgeColor maps a percentage to the shields.io colour string. -1
// (the missingFlag sentinel) collapses to "lightgrey" so a flag with
// no data renders as a clearly-greyed-out badge rather than a "0%
// red" badge that would imply the suite ran and failed.
func badgeColor(pct float64) string {
        switch {
        case pct < 0:
                return colorLightgrey
        case pct >= 90:
                return colorBrightGreen
        case pct >= 80:
                return colorGreen
        case pct >= 70:
                return colorYellowGreen
        case pct >= 60:
                return colorYellow
        case pct >= 40:
                return colorOrange
        default:
                return colorRed
        }
}

// badgeMessage renders the percentage with one decimal place, except
// for the missing-data sentinel which renders as the literal "n/a".
// Keeping the format tight ("71.3%") matches the codecov badges we're
// replacing.
func badgeMessage(pct float64) string {
        if pct < 0 {
                return missingMessage
        }
        return strconv.FormatFloat(pct, 'f', 1, 64) + "%"
}

// pickField extracts the percentage for a given --field name from a
// Summary. Returns an error for unrecognized fields so a typo in the
// workflow doesn't silently default to the total.
func pickField(s *Summary, field string) (float64, error) {
        switch field {
        case "total":
                return s.Total, nil
        case "go":
                return s.Go, nil
        case "cpp":
                return s.CPP, nil
        default:
                return 0, fmt.Errorf("unknown --field %q (want one of: total, go, cpp)", field)
        }
}

// runBadge implements `coverage badge`. It reads a summary.json
// previously emitted by `coverage summarize`, picks one field, and
// writes the shields.io endpoint JSON either to --out or stdout.
func runBadge(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("badge", stderr)
        in := fs.String("in", "", "input summary.json path (required)")
        out := fs.String("out", "", "output badge JSON path (default: stdout)")
        field := fs.String("field", "total", "summary field to render (total|go|cpp)")
        label := fs.String("label", "coverage", "badge label")
        if err := fs.Parse(args); err != nil {
                return err
        }
        if *in == "" {
                _, _ = fmt.Fprintln(stderr, "badge: --in is required")
                fs.Usage()
                return errUsage
        }

        summary, err := readSummary(*in)
        if err != nil {
                return err
        }
        pct, err := pickField(summary, *field)
        if err != nil {
                return err
        }

        payload := badgeJSON{
                SchemaVersion: 1,
                Label:         *label,
                Message:       badgeMessage(pct),
                Color:         badgeColor(pct),
        }
        return emitBadge(&payload, *out, stdout)
}

func emitBadge(b *badgeJSON, outPath string, stdout io.Writer) error {
        buf, err := json.MarshalIndent(b, "", "  ")
        if err != nil {
                return fmt.Errorf("marshal badge: %w", err)
        }
        buf = append(buf, '\n')

        if outPath == "" {
                _, err = stdout.Write(buf)
                return err
        }
        //nolint:gosec // 0o644 is the right mode for a CI-published JSON artifact.
        if err := os.WriteFile(outPath, buf, 0o644); err != nil {
                return fmt.Errorf("write %q: %w", outPath, err)
        }
        return nil
}

package main

import (
        "errors"
        "fmt"
        "io"
        "os"
        "strings"
)

// errRegression is returned by runGate when one or more tracked
// fields fell below the configured tolerance. It causes the binary
// to exit with status 1 so CI marks the check as failed.
var errRegression = errors.New("coverage regression")

// gateResult is the per-field outcome the gate prints to stdout. We
// always print one row per tracked flag (total, go, cpp) so the GitHub
// step-summary table is consistent across runs, even when a flag
// happens to be missing in either the current or baseline summary.
type gateResult struct {
        field       string
        current     float64
        baseline    float64
        tolerance   float64
        floor       float64
        regression  float64 // baseline - current; positive means we went down
        belowFloor  bool
        overTol     bool
        missingCurr bool
        missingBase bool
}

func (r gateResult) failed() bool {
        return r.overTol || r.belowFloor
}

// formatPct renders one percentage cell for the gate's table output.
// Missing flags collapse to "n/a" to match the badge command and keep
// the visual mapping between the two outputs obvious.
func formatPct(v float64) string {
        if v < 0 {
                return missingMessage
        }
        return fmt.Sprintf("%.1f%%", v)
}

// runGate implements `coverage gate`. It loads both summaries,
// compares each tracked field, prints a markdown-friendly table, and
// returns errRegression iff any field tripped the tolerance or floor.
//
// When --baseline points at a file that doesn't exist, the gate
// treats every field as having no baseline yet, prints a warning to
// stderr, and exits 0. That preserves the bootstrap case where
// gh-pages has not been populated yet.
//
//nolint:cyclop // straight-line flag handling; refactor would hurt readability.
func runGate(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("gate", stderr)
        currentPath := fs.String("current", "", "current summary.json path (required)")
        baselinePath := fs.String("baseline", "", "baseline summary.json path (required)")
        tol := fs.Float64("tolerance", 1.0, "max allowed regression (percentage points) on the total field")
        floor := fs.Float64("floor", 0.0, "absolute floor (percentage points) on the total field; 0 disables")
        goTol := fs.Float64("go-tolerance", 1.0, "max allowed regression on the go field")
        goFloor := fs.Float64("go-floor", 0.0, "absolute floor on the go field")
        cppTol := fs.Float64("cpp-tolerance", 1.0, "max allowed regression on the cpp field")
        cppFloor := fs.Float64("cpp-floor", 0.0, "absolute floor on the cpp field")
        if err := fs.Parse(args); err != nil {
                return err
        }
        if *currentPath == "" || *baselinePath == "" {
                _, _ = fmt.Fprintln(stderr, "gate: --current and --baseline are both required")
                fs.Usage()
                return errUsage
        }

        current, err := readSummary(*currentPath)
        if err != nil {
                return err
        }
        baseline, baselineMissing, err := loadBaseline(*baselinePath, stderr)
        if err != nil {
                return err
        }

        results := []gateResult{
                evalField("total", current.Total, baseline.Total, *tol, *floor),
                evalField("go", current.Go, baseline.Go, *goTol, *goFloor),
                evalField("cpp", current.CPP, baseline.CPP, *cppTol, *cppFloor),
        }

        writeGateTable(stdout, results, baselineMissing)

        if baselineMissing {
                return nil
        }
        for _, r := range results {
                if r.failed() {
                        return errRegression
                }
        }
        return nil
}

// loadBaseline opens the baseline JSON, treating a non-existent file
// as the bootstrap case (returns a zero-valued Summary, missing=true,
// no error). Any other read or decode failure propagates up so CI
// surfaces the real problem instead of silently passing.
func loadBaseline(path string, stderr io.Writer) (*Summary, bool, error) {
        if _, err := os.Stat(path); errors.Is(err, os.ErrNotExist) {
                _, _ = fmt.Fprintf(stderr, "gate: baseline %q does not exist; treating as bootstrap (no gate).\n", path)
                return &Summary{Total: missingFlag, Go: missingFlag, CPP: missingFlag}, true, nil
        } else if err != nil {
                return nil, false, fmt.Errorf("stat baseline %q: %w", path, err)
        }
        s, err := readSummary(path)
        if err != nil {
                return nil, false, err
        }
        return s, false, nil
}

// evalField runs the comparison for a single tracked field. Missing
// inputs collapse to "no opinion": if either the current or the
// baseline value is the missing sentinel, that field cannot fail the
// gate. This protects against, e.g., the C++ Bazel suite being
// temporarily disabled and the gate suddenly demanding a 0 -> 0
// improvement on a flag with no real data.
func evalField(name string, current, baseline, tol, floor float64) gateResult {
        r := gateResult{
                field:       name,
                current:     current,
                baseline:    baseline,
                tolerance:   tol,
                floor:       floor,
                missingCurr: current < 0,
                missingBase: baseline < 0,
        }
        if r.missingCurr || r.missingBase {
                return r
        }
        r.regression = baseline - current
        if r.regression > tol {
                r.overTol = true
        }
        if floor > 0 && current < floor {
                r.belowFloor = true
        }
        return r
}

// writeGateTable prints a markdown table summarising every field, the
// per-row result, and a final pass/fail line. The output is written
// to stdout (so it slots into `$GITHUB_STEP_SUMMARY` via
// `coverage gate ... >> $GITHUB_STEP_SUMMARY`) rather than stderr.
func writeGateTable(w io.Writer, results []gateResult, baselineMissing bool) {
        var b strings.Builder
        b.WriteString("## Coverage gate\n\n")
        if baselineMissing {
                b.WriteString("_Baseline missing; no regression gate enforced for this run._\n\n")
        }
        b.WriteString("| field | current | baseline | delta | tolerance | floor | status |\n")
        b.WriteString("|-------|---------|----------|-------|-----------|-------|--------|\n")
        for _, r := range results {
                b.WriteString(formatRow(r))
        }
        _, _ = fmt.Fprint(w, b.String())
}

func formatRow(r gateResult) string {
        delta := fmt.Sprintf("%+.1f%%", -r.regression)
        if r.missingCurr || r.missingBase {
                delta = "-"
        }
        status := "ok"
        switch {
        case r.missingCurr || r.missingBase:
                status = "skipped (missing data)"
        case r.overTol:
                status = fmt.Sprintf("FAIL: regressed %.1fpp > %.1fpp tol", r.regression, r.tolerance)
        case r.belowFloor:
                status = fmt.Sprintf("FAIL: below floor %.1f%%", r.floor)
        }
        return fmt.Sprintf(
                "| %s | %s | %s | %s | %.1fpp | %.1f%% | %s |\n",
                r.field,
                formatPct(r.current),
                formatPct(r.baseline),
                delta,
                r.tolerance,
                r.floor,
                status,
        )
}

// Command coverage is the BigQuery emulator's self-hosted Codecov
// replacement. It ingests the two coverage artifacts produced by CI
// (Go `coverage.out` from `go test -coverprofile=...`, and the
// aggregated LCOV `.dat` from `bazel coverage --combined_report=lcov`),
// then emits the three pieces the gh-pages pipeline needs:
//
//  1. `summarize` writes a JSON summary with the overall percentage
//     plus per-flag (go, cpp) percentages.
//  2. `badge`     writes a shields.io endpoint JSON for one field of
//     that summary so the README badges can be rendered
//     dynamically from gh-pages without any external SaaS.
//  3. `gate`      compares the current summary against the baseline
//     published by the last `main` build and exits
//     non-zero if any tracked percentage regressed beyond
//     the configured tolerance or fell below an absolute
//     floor.
//
// The binary has no external dependencies on purpose: it ships as part
// of the repo, runs in any environment that has a Go toolchain, and is
// trivial for contributors to reproduce locally via `task coverage:*`.
package main

import (
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
)

// Subcommand names. Declared as constants so the dispatch switch in
// run() and the table-driven tests stay aligned (and so the goconst
// linter does not complain about the strings repeating across files).
const (
        cmdSummarize = "summarize"
        cmdBadge     = "badge"
        cmdGate      = "gate"
)

// missingMessage is the literal rendered for missing-data percentages
// across the badge and gate subcommands. Centralised because the
// shields.io endpoint and the markdown step-summary share the same
// "absent value" convention.
const missingMessage = "n/a"

// run is the testable entry point. It dispatches on the subcommand
// (first positional argument) and returns an error instead of calling
// os.Exit so tests can exercise the full code path with table-driven
// fixtures without managing process lifetimes.
func run(args []string, stdout, stderr io.Writer) error {
        if len(args) < 1 {
                usage(stderr)
                return errUsage
        }
        cmd, rest := args[0], args[1:]
        switch cmd {
        case cmdSummarize:
                return runSummarize(rest, stdout, stderr)
        case cmdBadge:
                return runBadge(rest, stdout, stderr)
        case cmdGate:
                return runGate(rest, stdout, stderr)
        case "-h", "--help", "help":
                usage(stdout)
                return nil
        default:
                _, _ = fmt.Fprintf(stderr, "coverage: unknown subcommand %q\n\n", cmd)
                usage(stderr)
                return errUsage
        }
}

// errUsage signals that the caller passed an unrecognized or malformed
// invocation. main() translates it to exit code 2 (matching the Go
// `flag` package's convention) so wrappers can distinguish "you used
// it wrong" from a real failure.
var errUsage = errors.New("usage error")

func usage(w io.Writer) {
        _, _ = fmt.Fprint(w, `coverage - aggregate Go + C++ coverage for the self-hosted gh-pages pipeline.

Subcommands:
  summarize  Combine a Go coverage profile and/or an LCOV file into summary.json.
  badge      Emit a shields.io endpoint JSON for one field of summary.json.
  gate       Compare current summary to the baseline and fail on regression.

Run "coverage <subcommand> -h" for per-subcommand flags.
`)
}

// flagSet builds a FlagSet that prints its usage to stderr and stops
// on the first error. Centralizing this keeps every subcommand
// behaving the same way (and keeps tests from being polluted by
// stdlib's default ExitOnError behaviour).
func flagSet(name string, stderr io.Writer) *flag.FlagSet {
        fs := flag.NewFlagSet(name, flag.ContinueOnError)
        fs.SetOutput(stderr)
        return fs
}

func main() {
        if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
                if errors.Is(err, errUsage) {
                        os.Exit(2)
                }
                _, _ = fmt.Fprintf(os.Stderr, "coverage: %v\n", err)
                os.Exit(1)
        }
}

package main

import (
        "bufio"
        "fmt"
        "io"
        "os"
        "strconv"
        "strings"
)

// parseGoFile opens a Go coverage profile and returns (hitStatements,
// totalStatements). The Go profile format is documented at
// https://pkg.go.dev/golang.org/x/tools/cover and consists of:
//
//   - an optional first line `mode: (set|count|atomic)`
//   - one record per covered statement block:
//     `<file>:<startLine>.<startCol>,<endLine>.<endCol> <numStmts> <count>`
//
// We sum numStmts as totalStatements and the same numStmts whenever
// count > 0 as hitStatements. Mirrors what `go tool cover -func` prints
// on its `total:` line without shelling out to it (and without a
// dependency on `golang.org/x/tools/cover`).
func parseGoFile(path string) (hits, total int64, err error) {
        //nolint:gosec // CLI tool; reading caller-supplied paths is the point.
        f, err := os.Open(path)
        if err != nil {
                return 0, 0, fmt.Errorf("open: %w", err)
        }
        defer f.Close() //nolint:errcheck // read-only; close errors are not actionable
        return parseGoReader(f)
}

// parseGoReader is the buffered-reader entry point so tests can drive
// the parser without touching the filesystem.
func parseGoReader(r io.Reader) (hits, total int64, err error) {
        scanner := bufio.NewScanner(r)
        // Go coverage profiles for very large monorepos can exceed bufio's
        // default 64 KiB line cap when a single statement spans a wide
        // generated file; lift the cap to 1 MiB so we don't silently
        // truncate.
        scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)

        lineNo := 0
        for scanner.Scan() {
                lineNo++
                line := strings.TrimSpace(scanner.Text())
                if line == "" {
                        continue
                }
                if strings.HasPrefix(line, "mode:") {
                        continue
                }
                stmts, count, perr := parseGoLine(line)
                if perr != nil {
                        return 0, 0, fmt.Errorf("line %d: %w", lineNo, perr)
                }
                total += stmts
                if count > 0 {
                        hits += stmts
                }
        }
        if err := scanner.Err(); err != nil {
                return 0, 0, fmt.Errorf("scan: %w", err)
        }
        return hits, total, nil
}

// parseGoLine splits the trailing two numeric fields off a coverage
// record and returns (numStatements, count). The leading
// `file:start.col,end.col` slug is ignored because the per-file
// breakdown is only relevant for `go tool cover -html`, which the
// publishing workflow runs separately.
func parseGoLine(line string) (stmts, count int64, err error) {
        fields := strings.Fields(line)
        const requiredFields = 3
        if len(fields) < requiredFields {
                return 0, 0, fmt.Errorf("unexpected field count %d in %q", len(fields), line)
        }
        stmts, err = strconv.ParseInt(fields[len(fields)-2], 10, 64)
        if err != nil {
                return 0, 0, fmt.Errorf("parse stmts %q: %w", fields[len(fields)-2], err)
        }
        count, err = strconv.ParseInt(fields[len(fields)-1], 10, 64)
        if err != nil {
                return 0, 0, fmt.Errorf("parse count %q: %w", fields[len(fields)-1], err)
        }
        return stmts, count, nil
}

package main

import (
        "bufio"
        "fmt"
        "io"
        "os"
        "strconv"
        "strings"
)

// parseLCOVFile opens an LCOV "tracefile" (the .dat that
// `bazel coverage --combined_report=lcov` deposits at
// bazel-out/_coverage/_coverage_report.dat) and returns total
// (hitLines, totalLines) across every `SF:` record. We sum the
// per-line `DA:<line>,<count>` records directly rather than trusting
// the summary `LH:` / `LF:` totals, because some toolchains emit `LH`
// without `LF` (or vice versa) when a file has no executable lines.
//
// The LCOV format is described in
// https://manpages.debian.org/testing/lcov/geninfo.1.en.html under
// "TRACEFILE FORMAT". For our purposes we only care about three
// record types:
//
//        SF:<absolute path>            // start of a source file record
//        DA:<line>,<count>             // one DA per executable line
//        end_of_record                 // end of a source file record
//
// Anything else (TN, FN, BRDA, ...) is ignored.
func parseLCOVFile(path string) (hits, total int64, err error) {
        //nolint:gosec // CLI tool; reading caller-supplied paths is the point.
        f, err := os.Open(path)
        if err != nil {
                return 0, 0, fmt.Errorf("open: %w", err)
        }
        defer f.Close() //nolint:errcheck // read-only; close errors are not actionable
        return parseLCOVReader(f)
}

// parseLCOVReader is the buffered-reader entry point so tests can
// drive the parser without touching the filesystem.
func parseLCOVReader(r io.Reader) (hits, total int64, err error) {
        scanner := bufio.NewScanner(r)
        // Same generous line-length bump as the Go parser: combined LCOV
        // reports from large C++ trees can occasionally exceed 64 KiB on
        // pathological `BRDA:` lines, and we'd rather take the memory
        // than silently miscount.
        scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)

        var daHits, daTotal int64
        var lhLfHits, lhLfTotal int64
        var recordLH, recordLF int64
        lineNo := 0
        for scanner.Scan() {
                lineNo++
                line := strings.TrimSpace(scanner.Text())
                switch {
                case strings.HasPrefix(line, "DA:"):
                        count, perr := parseDARecord(line)
                        if perr != nil {
                                return 0, 0, fmt.Errorf("line %d: %w", lineNo, perr)
                        }
                        daTotal++
                        if count > 0 {
                                daHits++
                        }
                case strings.HasPrefix(line, "LH:"):
                        recordLH, err = parseSummaryCount(line, "LH:")
                        if err != nil {
                                return 0, 0, fmt.Errorf("line %d: %w", lineNo, err)
                        }
                case strings.HasPrefix(line, "LF:"):
                        recordLF, err = parseSummaryCount(line, "LF:")
                        if err != nil {
                                return 0, 0, fmt.Errorf("line %d: %w", lineNo, err)
                        }
                case line == "end_of_record":
                        lhLfHits += recordLH
                        lhLfTotal += recordLF
                        recordLH = 0
                        recordLF = 0
                }
        }
        if err := scanner.Err(); err != nil {
                return 0, 0, fmt.Errorf("scan: %w", err)
        }
        if daTotal > 0 {
                return daHits, daTotal, nil
        }
        return lhLfHits, lhLfTotal, nil
}

func parseSummaryCount(line, prefix string) (int64, error) {
        value := strings.TrimSpace(strings.TrimPrefix(line, prefix))
        count, err := strconv.ParseInt(value, 10, 64)
        if err != nil {
                return 0, fmt.Errorf("parse %s record %q: %w", strings.TrimSuffix(prefix, ":"), line, err)
        }
        return count, nil
}

// parseDARecord pulls the execution count off a `DA:<line>,<count>[,<checksum>]`
// line. The optional MD5 checksum field that geninfo emits when
// `--checksum` is enabled is tolerated and ignored.
func parseDARecord(line string) (count int64, err error) {
        rest := strings.TrimPrefix(line, "DA:")
        parts := strings.Split(rest, ",")
        const minFields = 2 // line,count (checksum optional)
        if len(parts) < minFields {
                return 0, fmt.Errorf("malformed DA record %q", line)
        }
        count, err = strconv.ParseInt(parts[1], 10, 64)
        if err != nil {
                return 0, fmt.Errorf("parse count %q in %q: %w", parts[1], line, err)
        }
        return count, nil
}

package main

import (
        "encoding/json"
        "fmt"
        "io"
        "os"
        "time"
)

// Summary is the JSON shape that lands at gh-pages:baseline.json after
// every main-branch build and at workflow_run artifact downloads for
// PR gating. Per-flag fields use -1 when no input was supplied for
// that flag so downstream consumers (badge, gate) can distinguish
// "missing" from "really zero coverage".
type Summary struct {
        // Total is the union percentage across every flag the summarize
        // command consumed; when only one flag is supplied it matches
        // that flag's percentage exactly.
        Total float64 `json:"total"`
        // Go is the percentage from the Go `coverage.out` profile.
        Go float64 `json:"go"`
        // CPP is the percentage from the Bazel/LCOV combined report.
        CPP float64 `json:"cpp"`
        // Commit is the git SHA the summary describes, if known.
        Commit string `json:"commit,omitempty"`
        // Timestamp is RFC3339 UTC.
        Timestamp string `json:"timestamp,omitempty"`
}

// missingFlag is the sentinel the consumers use to detect "no input
// for this flag was supplied". -1 is impossible for a real percentage,
// so it round-trips through JSON unambiguously.
const missingFlag = -1.0

// percentage divides hits by total guarding against zero, scales to a
// 0..100 range, and pins to one decimal place to keep the JSON file
// stable across runs that drift in the noise-floor.
func percentage(hits, total int64) float64 {
        if total <= 0 {
                return 0
        }
        pct := float64(hits) / float64(total) * 100
        return roundTenths(pct)
}

// roundTenths rounds to one decimal place so summary.json does not
// churn on minor floating-point noise. Avoids `math.Round` so the
// behaviour is obvious from the source.
func roundTenths(v float64) float64 {
        return float64(int64(v*10+0.5)) / 10
}

// runSummarize implements `coverage summarize`. Either of --go or
// --lcov may be omitted (e.g. when the lcov producer failed in CI);
// the missing flag's per-flag field is reported as `missingFlag` and
// the total is computed from the flags that did provide data.
//
//nolint:cyclop // straight-line option handling; splitting hurts readability.
func runSummarize(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("summarize", stderr)
        goPath := fs.String("go", "", "path to Go coverage profile (go test -coverprofile)")
        lcovPath := fs.String("lcov", "", "path to LCOV combined report (bazel coverage --combined_report=lcov)")
        outPath := fs.String("out", "", "output JSON path (default: stdout)")
        commit := fs.String("commit", "", "git commit SHA to record in summary.json (optional)")
        if err := fs.Parse(args); err != nil {
                return err
        }
        if *goPath == "" && *lcovPath == "" {
                _, _ = fmt.Fprintln(stderr, "summarize: at least one of --go or --lcov is required")
                fs.Usage()
                return errUsage
        }

        summary := Summary{
                Go:        missingFlag,
                CPP:       missingFlag,
                Commit:    *commit,
                Timestamp: time.Now().UTC().Format(time.RFC3339),
        }

        var goHits, goTotal, cppHits, cppTotal int64

        if *goPath != "" {
                h, t, err := parseGoFile(*goPath)
                if err != nil {
                        return fmt.Errorf("parse go profile %q: %w", *goPath, err)
                }
                goHits, goTotal = h, t
                summary.Go = percentage(h, t)
        }

        if *lcovPath != "" {
                h, t, err := parseLCOVFile(*lcovPath)
                if err != nil {
                        return fmt.Errorf("parse lcov file %q: %w", *lcovPath, err)
                }
                cppHits, cppTotal = h, t
                summary.CPP = percentage(h, t)
        }

        summary.Total = percentage(goHits+cppHits, goTotal+cppTotal)
        return emitSummary(&summary, *outPath, stdout)
}

// emitSummary marshals to JSON (indented, deterministic field order
// because Summary's fields are declared in the order we want) and
// writes either to a file or stdout. A trailing newline keeps the
// output friendly to `cat`.
func emitSummary(s *Summary, outPath string, stdout io.Writer) error {
        buf, err := json.MarshalIndent(s, "", "  ")
        if err != nil {
                return fmt.Errorf("marshal summary: %w", err)
        }
        buf = append(buf, '\n')

        if outPath == "" {
                _, err = stdout.Write(buf)
                return err
        }
        //nolint:gosec // 0o644 is the right mode for a CI-published JSON artifact.
        if err := os.WriteFile(outPath, buf, 0o644); err != nil {
                return fmt.Errorf("write %q: %w", outPath, err)
        }
        return nil
}

// readSummary loads a summary file previously written by
// runSummarize. Used by both the badge and gate subcommands.
func readSummary(path string) (*Summary, error) {
        //nolint:gosec // CLI tool; reading caller-supplied paths is the point.
        buf, err := os.ReadFile(path)
        if err != nil {
                return nil, fmt.Errorf("read %q: %w", path, err)
        }
        var s Summary
        if err := json.Unmarshal(buf, &s); err != nil {
                return nil, fmt.Errorf("decode %q: %w", path, err)
        }
        return &s, nil
}

package main

import (
        "bytes"
        "fmt"
        "io"
        "regexp"
        "sort"
        "strings"
)

// Finding is a single source-only rule violation. It is the
// reporting unit produced by every check below; the runner formats
// findings into the standard `path:line:col: rule: message` shape
// (compatible with editor jump-to-line) before exiting.
type Finding struct {
        // Rule is the stable identifier callers grep for, e.g.
        // `file-length`, `banned-logging`, `status-discarded`.
        Rule string
        // Path is repo-relative, matching the `cpp-lint list` output.
        Path string
        // Line is 1-based; 0 means "applies to the whole file".
        Line int
        // Col is 1-based, 0 when the rule is line-level.
        Col int
        // Message is the human-readable explanation. Should fit on one
        // line; longer guidance belongs in the docs the message links
        // to.
        Message string
}

// Format renders a Finding into the `path:line:col: rule: msg`
// convention. Editors jump to the right location when stderr is
// piped through the standard error filter.
func (f Finding) Format() string {
        switch {
        case f.Line > 0 && f.Col > 0:
                return fmt.Sprintf("%s:%d:%d: %s: %s", f.Path, f.Line, f.Col, f.Rule, f.Message)
        case f.Line > 0:
                return fmt.Sprintf("%s:%d: %s: %s", f.Path, f.Line, f.Rule, f.Message)
        default:
                return fmt.Sprintf("%s: %s: %s", f.Path, f.Rule, f.Message)
        }
}

// CheckOptions bundles the knobs every per-file check needs to
// know about. Centralised so the runner threads them through one
// argument instead of growing an ever-longer parameter list as new
// rules land.
type CheckOptions struct {
        // MaxFileLines is the whole-file line count above which a file
        // is rejected. 500 today (see plan thresholds).
        MaxFileLines int
}

// runOnce applies every source-only check to a single file and
// returns the set of findings. The returned slice is sorted by
// (line, rule) for deterministic output regardless of map
// iteration order in the underlying check helpers, and findings
// suppressed via inline `// cpp-lint:allow(rule) ...` comments are
// dropped before returning.
func runOnce(path string, body []byte, opts CheckOptions) []Finding {
        lines := splitLines(body)
        suppressions := collectSuppressions(lines)

        var out []Finding
        out = append(out, checkFileLength(path, body, opts)...)
        out = append(out, checkBannedLogging(path, lines)...)
        out = append(out, checkStatusAntiPatterns(path, lines)...)
        out = filterSuppressed(out, suppressions)
        sort.SliceStable(out, func(i, j int) bool {
                if out[i].Line != out[j].Line {
                        return out[i].Line < out[j].Line
                }
                return out[i].Rule < out[j].Rule
        })
        return out
}

// splitLines slices `body` into lines, preserving 1-based numbering
// when the slice is later indexed via lines[lineNo-1]. We split on
// '\n' so a CRLF file still produces clean line content (the
// trailing '\r' is left in place; rule helpers strip it via the
// shared normaliser when needed).
func splitLines(body []byte) []string {
        if len(body) == 0 {
                return nil
        }
        s := strings.TrimSuffix(string(body), "\n")
        return strings.Split(s, "\n")
}

// suppressMarkerRE recognises a suppression comment of the form
// `// cpp-lint:allow(rule[, rule]) -- reason text`. The marker may
// sit at the end of the offending line OR on a comment-only line
// directly preceding it; both placements suppress the listed rules
// for the next code line. We support both styles because
// clang-format may wrap a long trailing comment, but a marker on
// its own line stays on its own line.
//
// The trailing reason is mandatory: a `nolint`-style suppression
// that does not say WHY is harder to audit later. A marker that
// omits the `-- reason` body silently fails to suppress.
var suppressMarkerRE = regexp.MustCompile(`(?:^|\s)//\s*cpp-lint:allow\(([^)]*)\)\s*--\s*(.+?)\s*$`)

// suppression records the rule set that an inline marker disables
// on a particular source line.
type suppression struct {
        Rules map[string]struct{}
}

// collectSuppressions returns the line -> rule-set map. A marker
// on line N applies to line N, and additionally to the next code
// line (the first subsequent line that is not blank and not a
// pure comment) when the marker itself sits on a comment-only
// line. The latter handles clang-format wrapping a long trailing
// comment over two lines: the marker now lives on a comment-only
// line whose target is the first code line that follows.
func collectSuppressions(lines []string) map[int]suppression {
        out := map[int]suppression{}
        for i, raw := range lines {
                m := suppressMarkerRE.FindStringSubmatch(raw)
                if m == nil {
                        continue
                }
                rules := parseSuppressionRules(m[1])
                mergeSuppression(out, i+1, rules)
                if isCommentOnlyLine(raw) {
                        if next := nextCodeLine(lines, i+1); next > 0 {
                                mergeSuppression(out, next, rules)
                        }
                }
        }
        return out
}

func parseSuppressionRules(spec string) map[string]struct{} {
        rules := map[string]struct{}{}
        for r := range strings.SplitSeq(spec, ",") {
                r = strings.TrimSpace(r)
                if r == "" {
                        continue
                }
                rules[r] = struct{}{}
        }
        return rules
}

func mergeSuppression(dst map[int]suppression, line int, rules map[string]struct{}) {
        cur, ok := dst[line]
        if !ok {
                cur = suppression{Rules: map[string]struct{}{}}
        }
        for r := range rules {
                cur.Rules[r] = struct{}{}
        }
        dst[line] = cur
}

// isCommentOnlyLine returns true when the line contains nothing
// outside whitespace and a `//` comment. We treat block comments
// (`/* ... */` on the same line) as code so a marker buried inside
// a multi-line block comment does not accidentally apply to the
// next statement.
func isCommentOnlyLine(line string) bool {
        trimmed := strings.TrimSpace(line)
        return strings.HasPrefix(trimmed, "//")
}

// nextCodeLine returns the 1-based line number of the first line
// at or after `start` (1-based) that contains executable code.
// Blank lines and pure-comment lines are skipped. Returns 0 when
// no such line exists in the file.
func nextCodeLine(lines []string, start int) int {
        for i := start; i < len(lines); i++ {
                raw := lines[i]
                if strings.TrimSpace(raw) == "" {
                        continue
                }
                if isCommentOnlyLine(raw) {
                        continue
                }
                return i + 1
        }
        return 0
}

func filterSuppressed(in []Finding, sup map[int]suppression) []Finding {
        if len(sup) == 0 {
                return in
        }
        out := in[:0]
        for _, f := range in {
                if entry, ok := sup[f.Line]; ok {
                        if _, dropped := entry.Rules[f.Rule]; dropped {
                                continue
                        }
                }
                out = append(out, f)
        }
        return out
}

// --- file-length --------------------------------------------------------

const (
        ruleFileLength = "file-length"
)

// checkFileLength enforces the whole-file line cap. The plan
// chooses 500 lines for first-party `.cc`/`.h` to mirror the Go
// `revive` `file-length-limit` rule already enabled in
// `.golangci.yml`.
func checkFileLength(path string, body []byte, opts CheckOptions) []Finding {
        if opts.MaxFileLines <= 0 {
                return nil
        }
        lines := countLines(body)
        if lines <= opts.MaxFileLines {
                return nil
        }
        return []Finding{
                {
                        Rule: ruleFileLength,
                        Path: path,
                        Message: fmt.Sprintf(
                                "file has %d lines (max %d); split the file",
                                lines,
                                opts.MaxFileLines,
                        ),
                },
        }
}

// countLines returns the number of newline-terminated lines in the
// file plus one for any trailing partial line. Matches `wc -l`'s
// off-by-one behaviour for files that do not end in `\n`, so the
// reported count agrees with what `wc -l` shows in CI logs.
func countLines(body []byte) int {
        if len(body) == 0 {
                return 0
        }
        count := bytes.Count(body, []byte{'\n'})
        if body[len(body)-1] != '\n' {
                count++
        }
        return count
}

// --- banned-logging ----------------------------------------------------

const ruleBannedLogging = "banned-logging"

// bannedLoggingPatterns is the (rule-message-friendly) list of
// production logging APIs we explicitly do not allow inside
// production C++. Tests, fixture printers, and the smoke binaries
// under `tools/googlesql-prebuilt/smoke/` may use these; the
// runner skips the rule for those paths via isLoggingAllowed().
//
// Each entry is a literal substring match that is anchored to a
// non-identifier boundary, so `kStdCoutName` does not falsely
// match `std::cout`. The patterns are kept simple — a regex with
// capture groups would be overkill for a list this small.
var bannedLoggingPatterns = []struct {
        needle  string
        message string
}{
        {
                "std::cout",
                "std::cout is banned in production C++; use absl::Status / structured logging via the gRPC error envelope",
        },
        {"std::cerr", "std::cerr is banned in production C++; surface errors through absl::Status / grpc::Status instead"},
        {"std::clog", "std::clog is banned in production C++; route diagnostics through absl::Status / grpc::Status"},
        {
                "std::printf",
                "std::printf is banned in production C++; use absl::StrCat / absl::StrFormat and return errors via Status",
        },
        {
                "std::fprintf",
                "std::fprintf is banned in production C++; surface diagnostics through absl::Status / grpc::Status",
        },
}

// printfWordRE matches a top-level `printf(` or `fprintf(` call
// (no `std::` prefix, no `::` either). This catches the common
// `<cstdio>` / `<stdio.h>` variants without flagging field /
// member-named `printf` (`obj.printf(...)`). The look-behind is
// expressed as a non-capturing group followed by a manual
// boundary check inside checkBannedLogging.
var printfWordRE = regexp.MustCompile(`\b(f?printf)\s*\(`)

func checkBannedLogging(path string, lines []string) []Finding {
        if isLoggingAllowed(path) {
                return nil
        }
        var out []Finding
        for i, raw := range lines {
                lineNo := i + 1
                stripped := stripCommentsAndStrings(raw)
                if stripped == "" {
                        continue
                }
                for _, p := range bannedLoggingPatterns {
                        if idx := strings.Index(stripped, p.needle); idx >= 0 {
                                out = append(out, Finding{
                                        Rule:    ruleBannedLogging,
                                        Path:    path,
                                        Line:    lineNo,
                                        Col:     idx + 1,
                                        Message: p.message,
                                })
                        }
                }
                // printf / fprintf without an `std::` prefix. Skip the
                // match when it is preceded by `::` (already covered by
                // the std::printf needle), `.` / `->` (member call), or
                // an identifier character (avoids false positives on
                // `kSomePrefixprintf`-style symbols).
                for _, m := range printfWordRE.FindAllStringIndex(stripped, -1) {
                        start := m[0]
                        if start > 0 {
                                prev := stripped[start-1]
                                if isIdentChar(prev) || prev == '.' || prev == '>' || prev == ':' {
                                        continue
                                }
                        }
                        out = append(out, Finding{
                                Rule:    ruleBannedLogging,
                                Path:    path,
                                Line:    lineNo,
                                Col:     start + 1,
                                Message: "printf / fprintf is banned in production C++; surface errors through absl::Status",
                        })
                }
        }
        return out
}

// isLoggingAllowed returns true when the file is a C++ test, the
// smoke binary, or anything under `binaries/emulator_main/main.cc`
// (which legitimately writes the `--help` / `--version` output to
// stdout/stderr at process start).
//
// The exemptions are deliberately narrow: the `main.cc` carve-out
// stays a literal path match so a future `binaries/foo/main.cc`
// has to opt in explicitly, and the smoke directory matches by
// prefix because every file under it is non-production.
func isLoggingAllowed(path string) bool {
        if IsTestFile(path) {
                return true
        }
        if strings.HasPrefix(path, "tools/googlesql-prebuilt/smoke/") {
                return true
        }
        if path == SentinelEmulatorMain {
                return true
        }
        return false
}

// stripCommentsAndStrings returns `line` with `//` comments and
// double-quoted strings replaced by spaces of the same length.
// This is a deliberately small lexer — it does not follow `/*
// */` block comments across newlines or recognise raw string
// literals — but it is sufficient to keep `// std::cerr is bad`
// comments and `"std::cout"` literals from raising findings while
// still catching real violations on any normal source line.
//
// We pad with spaces (rather than truncate) so column numbers in
// findings still match the on-disk file. Block comments and raw
// strings are flagged as a known-limitation in the package README;
// a real lexer would be overkill for a check that already runs in
// well under a second.
func stripCommentsAndStrings(line string) string {
        var out strings.Builder
        out.Grow(len(line))
        inString := false
        escape := false
        for i := 0; i < len(line); i++ {
                c := line[i]
                if inString {
                        out.WriteByte(' ')
                        if escape {
                                escape = false
                                continue
                        }
                        switch c {
                        case '\\':
                                escape = true
                        case '"':
                                inString = false
                        }
                        continue
                }
                if c == '"' {
                        inString = true
                        out.WriteByte(' ')
                        continue
                }
                if c == '/' && i+1 < len(line) && line[i+1] == '/' {
                        // Pad the rest of the line so column numbers stay
                        // aligned with the original buffer.
                        for ; i < len(line); i++ {
                                out.WriteByte(' ')
                        }
                        break
                }
                out.WriteByte(c)
        }
        return out.String()
}

func isIdentChar(b byte) bool {
        return (b >= 'a' && b <= 'z') ||
                (b >= 'A' && b <= 'Z') ||
                (b >= '0' && b <= '9') ||
                b == '_'
}

// --- status-anti-patterns ---------------------------------------------

const (
        ruleStatusDiscarded   = "status-discarded"
        ruleStatusOrUnchecked = "statusor-unchecked-value"
)

// statusReturnRE matches a top-level call whose result is a
// statement-form `absl::Status` (or `Status` in the
// `bigquery_emulator` namespace). The pattern is intentionally
// simple: any line whose semicolon-terminated statement looks like
// `f(args);` AND whose textual context names a known
// status-returning function gets reported. Real
// `absl::Status s = f(); if (!s.ok()) ...` flows are not flagged
// because the assignment ends with `=` rather than `f(args);`.
//
// We avoid trying to be a parser — clang-tidy's
// `bugprone-unused-return-value` is the long-term mechanism for
// this — but the regex catches the obvious case where someone
// types `engine.ExecuteDdl(...)` and forgets to inspect the
// result.
var statusCallStmtRE = regexp.MustCompile(
        `^\s*([A-Za-z_][A-Za-z_0-9]*::)*([A-Za-z_][A-Za-z_0-9]*\s*\.\s*)?([A-Za-z_][A-Za-z_0-9]*)\(`,
)

// statusOrValueRE matches `.value()` invocations on a `StatusOr`
// without a preceding `.ok()` guard on the same line. The check
// runs per-line so it is necessarily approximate; the value of
// catching even the obvious cases outweighs the false-positive
// risk because the suggested replacement (`*r` after a `.ok()`
// check, or `r.value_or(default)` with an explicit fallback) is
// almost always cleaner.
var statusOrValueRE = regexp.MustCompile(`\.value\(\)`)

// statusOrOkRE detects an inline `.ok()` check on the same source
// line as the `.value()` call. When present, we skip the
// `.value()` finding because the code is already guarding the
// dereference.
var statusOrOkRE = regexp.MustCompile(`\.ok\(\)`)

// checkStatusAntiPatterns surfaces two concrete failure modes:
//   - `RunSql(...);` (a discarded `absl::Status`).
//   - `result.value()` with no `.ok()` / status guard within a
//     short window of the call (a `StatusOr` access that crashes
//     on an absent value).
//
// Both rules are deliberately conservative — clang-tidy's
// `bugprone-unused-return-value` and
// `bugprone-unchecked-optional-access` are the long-term
// mechanism. The checks here exist so `task lint:run` can still
// catch the most common review nits without spinning up the full
// compile-aware lane.
func checkStatusAntiPatterns(path string, lines []string) []Finding {
        if IsTestFile(path) {
                return nil
        }
        var out []Finding
        for i, raw := range lines {
                lineNo := i + 1
                stripped := stripCommentsAndStrings(raw)
                if stripped == "" {
                        continue
                }
                out = append(out, scanDiscardedStatus(path, lineNo, stripped)...)
                out = append(out, scanStatusOrValue(path, lineNo, stripped, lines)...)
        }
        return out
}

// statusDiscardedFunctions lists functions whose return value
// must always be inspected. The list is small on purpose: this
// helper exists for repo-specific rules that clang-tidy's
// generic `[[nodiscard]]` plumbing cannot model without seeing
// the headers. Once `[[nodiscard]]` annotations land on the
// matching declarations, clang-tidy will surface the same
// findings during `task lint:cpp:tidy`.
var statusDiscardedFunctions = map[string]struct{}{
        // Engine.ExecuteDdl returns absl::Status; ignoring it loses
        // the failure that the gateway needs to surface as a 4xx /
        // 5xx response.
        "ExecuteDdl": {},
        // Storage.AppendRows returns absl::Status; ignoring it
        // silently drops a streaming insert.
        "AppendRows": {},
        // Storage.OverwriteRows returns absl::Status.
        "OverwriteRows": {},
        // Storage.DropTable returns absl::Status.
        "DropTable": {},
}

func scanDiscardedStatus(path string, line int, stripped string) []Finding {
        // Cheap pre-filter: a discarded-status statement always ends in `;`.
        if !strings.HasSuffix(strings.TrimSpace(stripped), ";") {
                return nil
        }
        m := statusCallStmtRE.FindStringSubmatch(stripped)
        if m == nil {
                return nil
        }
        fn := m[3]
        if _, banned := statusDiscardedFunctions[fn]; !banned {
                return nil
        }
        return []Finding{{
                Rule:    ruleStatusDiscarded,
                Path:    path,
                Line:    line,
                Col:     1,
                Message: fmt.Sprintf("discarded absl::Status return from %q; capture and inspect via if (!s.ok()) { ... }", fn),
        }}
}

// statusOrLookbackLines is the number of preceding non-blank lines
// scanStatusOrValue inspects when looking for an `.ok()` guard.
// Five lines is enough to cover the canonical pattern:
//
//        absl::StatusOr<T> rendered = ...;
//        if (!rendered.ok()) return rendered.status();
//        *out = std::move(rendered).value();
//
// while still catching the obvious "fetch -> dereference" calls
// that lack any guard.
const statusOrLookbackLines = 5

func scanStatusOrValue(path string, line int, stripped string, lines []string) []Finding {
        if !statusOrValueRE.MatchString(stripped) {
                return nil
        }
        if statusOrOkRE.MatchString(stripped) {
                return nil
        }
        if hasNearbyStatusGuard(lines, line) {
                return nil
        }
        idx := statusOrValueRE.FindStringIndex(stripped)
        return []Finding{
                {
                        Rule:    ruleStatusOrUnchecked,
                        Path:    path,
                        Line:    line,
                        Col:     idx[0] + 1,
                        Message: "StatusOr<T>::value() without a nearby .ok() guard; check status before unwrapping (or annotate with `// cpp-lint:allow(statusor-unchecked-value) -- reason` if intentional)",
                },
        }
}

// statusGuardRE matches the patterns we treat as a `.value()`
// safety net:
//
//   - `if (!x.ok())` — the canonical guard.
//   - `return x.status()` — the early-return inside a guard.
//   - `RETURN_IF_ERROR(x)` — the absl macro pattern.
//   - `ASSIGN_OR_RETURN(...)` — likewise.
//
// We intentionally over-accept here. False negatives (a missed
// finding because of a permissive guard pattern) are acceptable;
// false positives (a screaming finding on already-safe code) are
// not, because they erode trust in the rule.
var statusGuardRE = regexp.MustCompile(
        `!\s*[A-Za-z_][A-Za-z_0-9]*\s*\.\s*ok\(\)|RETURN_IF_ERROR\s*\(|ASSIGN_OR_RETURN\s*\(|\.\s*status\(\)`,
)

func hasNearbyStatusGuard(lines []string, line int) bool {
        from := max(line-statusOrLookbackLines, 1)
        for n := line - 1; n >= from; n-- {
                stripped := stripCommentsAndStrings(lines[n-1])
                if statusGuardRE.MatchString(stripped) {
                        return true
                }
        }
        return false
}

// runCheck is the `cpp-lint check` subcommand. It loads the
// canonical first-party source list, runs every per-file rule,
// prints findings to stdout, and exits with `errFindings` when at
// least one rule reported a violation.
//
// Flags:
//   - `--max-lines` overrides the file-length cap (default 500).
func runCheck(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("check", stderr)
        maxLines := fs.Int("max-lines", 500, "fail when a first-party C++ file exceeds this many lines")
        if err := fs.Parse(args); err != nil {
                return errUsage
        }
        if fs.NArg() != 0 {
                _, _ = fmt.Fprintln(stderr, "cpp-lint check: takes no positional arguments")
                return errUsage
        }

        files, root, err := readSources()
        if err != nil {
                return err
        }

        opts := CheckOptions{MaxFileLines: *maxLines}
        var totalFindings int
        for _, rel := range files {
                body, rerr := readFile(resolveAgainstRoot(root, rel))
                if rerr != nil {
                        return fmt.Errorf("read %s: %w", rel, rerr)
                }
                for _, f := range runOnce(rel, body, opts) {
                        _, _ = fmt.Fprintln(stdout, f.Format())
                        totalFindings++
                }
        }
        if totalFindings > 0 {
                _, _ = fmt.Fprintf(stderr, "cpp-lint: %d finding(s)\n", totalFindings)
                return errFindings
        }
        return nil
}

// Command cpp-lint is the BigQuery emulator's first-party C++
// source-only lint runner.
//
// It exists to enforce three classes of rule that clang-format and
// clang-tidy do not cover well in this repo:
//
//  1. The list of files we own (vs. vendored / generated / cached
//     trees) is computed in exactly one place. Every C++ lint task
//     consumes it via `cpp-lint list`, so a path that is not first
//     party can never sneak into clang-format, clang-tidy, or
//     cppcheck.
//  2. A whole-file line-count rule (default 500 lines) that
//     clang-tidy's `readability-function-size` cannot express.
//  3. Repo-specific anti-patterns: banned production logging APIs
//     (`std::cout` / `std::cerr` / `printf` outside tests and
//     tools), and obvious `absl::Status` / `absl::StatusOr<T>`
//     misuse such as a discarded `Status` return or `.value()` on a
//     `StatusOr` without a status check first.
//
// The binary deliberately depends only on the standard library so a
// fresh checkout can run `go run ./tools/lint/cpp` (or the
// `task lint:cpp:source` wrapper) without bootstrapping anything
// extra. Subcommands and flags follow the same shape as
// `tools/coverage` so contributors recognise the layout.
package main

import (
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
)

// Subcommand names. Centralised so the dispatch in run() and the
// table-driven tests stay aligned.
const (
        cmdList         = "list"
        cmdCheck        = "check"
        cmdParseTidyLog = "parse-tidy-log"
)

// run is the testable entry point. It returns an error instead of
// calling os.Exit so tests can drive the full code path with
// table-driven fixtures without managing process lifetime.
func run(args []string, stdout, stderr io.Writer) error {
        if len(args) < 1 {
                usage(stderr)
                return errUsage
        }
        cmd, rest := args[0], args[1:]
        switch cmd {
        case cmdList:
                return runList(rest, stdout, stderr)
        case cmdCheck:
                return runCheck(rest, stdout, stderr)
        case cmdParseTidyLog:
                return runParseTidyLog(rest, stdout, stderr)
        case "-h", "--help", "help":
                usage(stdout)
                return nil
        default:
                _, _ = fmt.Fprintf(stderr, "cpp-lint: unknown subcommand %q\n\n", cmd)
                usage(stderr)
                return errUsage
        }
}

// errUsage maps to exit code 2, matching the Go `flag` package's
// convention so wrappers can distinguish "you used it wrong" from a
// real lint failure (exit 1).
var errUsage = errors.New("usage error")

// errFindings signals that the lint run completed cleanly but at
// least one rule reported a finding. Wrappers exit 1 on this so CI
// can tell rule violations apart from infrastructure failures.
var errFindings = errors.New("lint findings")

func usage(w io.Writer) {
        _, _ = fmt.Fprint(w, `cpp-lint - first-party C++ source-only lint runner.

Subcommands:
  list            Print the canonical first-party C++ source list (one path per line).
  check           Run the source-only checks (file size, banned logging, status misuse).
  parse-tidy-log  Parse lint-cpp-tidy.log into CSV + triage markdown.

Run "cpp-lint <subcommand> -h" for per-subcommand flags.
`)
}

// flagSet builds a FlagSet that prints its usage to stderr and stops
// on the first error. Centralising this keeps every subcommand
// behaving the same way (and keeps tests from being polluted by the
// stdlib's default ExitOnError behaviour).
func flagSet(name string, stderr io.Writer) *flag.FlagSet {
        fs := flag.NewFlagSet(name, flag.ContinueOnError)
        fs.SetOutput(stderr)
        return fs
}

func main() {
        if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
                switch {
                case errors.Is(err, errUsage):
                        os.Exit(2)
                case errors.Is(err, errFindings):
                        os.Exit(1)
                default:
                        _, _ = fmt.Fprintf(os.Stderr, "cpp-lint: %v\n", err)
                        os.Exit(1)
                }
        }
}

package main

import (
        "bufio"
        "fmt"
        "io"
        "os"
        "os/exec"
        "path/filepath"
        "sort"
        "strings"
)

// firstPartyIncludeRoots names the only directories that contain
// hand-written, repo-owned C++ that the lint stack is allowed to
// touch. Every other root is either vendored upstream code,
// generated output, a Bazel cache, or build / ops artifacts that
// must not be reformatted by us.
//
// The list is intentionally short and explicit: adding a new
// first-party C++ tree should be a deliberate edit here, not a side
// effect of someone dropping a `.cc` file into an unrelated
// directory.
var firstPartyIncludeRoots = []string{
        "backend/",
        "binaries/",
        "frontend/",
        "tools/googlesql-prebuilt/smoke/",
}

// firstPartyExcludePrefixes lists path prefixes that look first
// party at a glance but are either generated or wrappers around
// upstream code. They are excluded after the include filter so a
// future first-party file under one of the parent directories does
// not accidentally inherit the exclusion.
//
//   - `tools/googlesql-prebuilt/templates/` is `cc_library` glue we
//     stamp into the prebuilt artifact's BUILD file; the C++ headers
//     under it shadow GoogleSQL surface types and follow upstream
//     style.
//   - `binaries/emulator_main/version.cc` is genrule output that
//     `version_gen.sh` writes from `version.cc.tmpl`. The template
//     itself stays in the source list because we hand-write it.
var firstPartyExcludePrefixes = []string{
        "tools/googlesql-prebuilt/templates/",
}

// firstPartyExcludePaths lists individual files that match the
// include filter but must never be linted. Generated artifacts and
// Bazel-stamped outputs go here.
var firstPartyExcludePaths = map[string]struct{}{
        // The genrule output for `binaries/emulator_main:version_cc`.
        // Bazel may stage it under `bazel-out/`, but a stray symlink
        // inside the worktree (or a `bazel run` artifact) must not
        // pull it into the lint set.
        "binaries/emulator_main/version.cc": {},
}

// firstPartyExtensions lists the file extensions we treat as C++
// sources for lint purposes. Headers and source files are listed
// together because clang-format, clang-tidy, and cppcheck all
// expect both to share a single configuration set, and the
// source-only checks (file size, banned logging) apply uniformly.
var firstPartyExtensions = []string{".cc", ".cpp", ".cxx", ".h", ".hpp", ".hh"}

// SourceLister returns the set of tracked C++ files we own. The
// real implementation shells out to `git ls-files`; tests provide a
// fixture-driven stub via the same interface so they do not need a
// live git repo.
type SourceLister interface {
        List() ([]string, error)
}

// gitSourceLister is the production SourceLister. It uses
// `git ls-files` so that .gitignore and the working tree state
// determine the answer — not a brittle filesystem walk that would
// pick up Bazel symlinks (`bazel-*`), the `.cache/` tree, or
// deleted-but-still-on-disk files.
type gitSourceLister struct {
        // repoRoot is passed to `git -C` so the lister works from any
        // subdirectory and from inside test temp dirs.
        repoRoot string
}

// newGitSourceLister returns a SourceLister rooted at the current
// working directory's enclosing git repo. The repo discovery is
// done eagerly so callers fail fast when run outside a checkout.
func newGitSourceLister() (*gitSourceLister, error) {
        root, err := repoRoot()
        if err != nil {
                return nil, err
        }
        return &gitSourceLister{repoRoot: root}, nil
}

// List enumerates the first-party C++ files in repo order. The
// order is stable across runs because `git ls-files` already sorts
// by path; we sort again defensively after applying the filters so
// downstream tools see a deterministic ordering even if git's
// internal order ever changes.
func (g *gitSourceLister) List() ([]string, error) {
        out, err := gitLsFiles(g.repoRoot)
        if err != nil {
                return nil, err
        }
        return filterFirstParty(out), nil
}

// repoRoot returns the absolute path of the enclosing git repo.
// We prefer `git rev-parse --show-toplevel` over walking up looking
// for a `.git` directory because the latter misbehaves inside git
// worktrees (`.git` is a regular file there, not a directory).
func repoRoot() (string, error) {
        cmd := exec.Command("git", "rev-parse", "--show-toplevel")
        out, err := cmd.Output()
        if err != nil {
                return "", fmt.Errorf("git rev-parse: %w", err)
        }
        return strings.TrimSpace(string(out)), nil
}

// gitLsFiles asks git for every tracked file in the working tree.
// We deliberately do NOT pass globs here: a `.cc` file under
// `third_party/` is still tracked, and we want the include /
// exclude lists below to be the single source of truth for what
// "first party" means. Filtering server-side via globs would let a
// pattern bug silently include vendored code.
func gitLsFiles(dir string) ([]string, error) {
        // #nosec G204 -- 'git' is a fixed binary; dir is the lint tool's
        // repo root.
        cmd := exec.Command("git", "-C", dir, "ls-files")
        out, err := cmd.Output()
        if err != nil {
                return nil, fmt.Errorf("git ls-files: %w", err)
        }
        scanner := bufio.NewScanner(strings.NewReader(string(out)))
        scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
        var files []string
        for scanner.Scan() {
                line := strings.TrimSpace(scanner.Text())
                if line == "" {
                        continue
                }
                files = append(files, line)
        }
        if err := scanner.Err(); err != nil {
                return nil, fmt.Errorf("scan ls-files: %w", err)
        }
        return files, nil
}

// filterFirstParty applies the include / exclude rules to a raw
// `git ls-files` output. It is the single function tests pin so
// the ownership boundary cannot drift silently.
func filterFirstParty(all []string) []string {
        var out []string
        for _, p := range all {
                // Normalise forward slashes early. `git ls-files` always
                // uses `/` on every platform git supports, but a stray
                // backslash from a hand-built test fixture or a Windows
                // host should still route through the include filter,
                // so replace backslashes directly. (filepath.ToSlash is
                // a no-op on linux.)
                p = strings.ReplaceAll(p, `\`, "/")
                if !hasCPPExtension(p) {
                        continue
                }
                if !underAnyPrefix(p, firstPartyIncludeRoots) {
                        continue
                }
                if underAnyPrefix(p, firstPartyExcludePrefixes) {
                        continue
                }
                if _, dropped := firstPartyExcludePaths[p]; dropped {
                        continue
                }
                out = append(out, p)
        }
        sort.Strings(out)
        return out
}

// hasCPPExtension returns true when `path` looks like a C++ source
// or header file by extension. The check is intentionally
// case-sensitive because every first-party file in this repo
// already uses lowercase extensions and the surrounding tooling
// (clang-format, clang-tidy) follows the same convention.
func hasCPPExtension(path string) bool {
        for _, ext := range firstPartyExtensions {
                if strings.HasSuffix(path, ext) {
                        return true
                }
        }
        return false
}

// underAnyPrefix returns true when `path` lives under at least one
// of the supplied directory prefixes. Prefixes must end with `/`
// so a directory named like `backend2/` cannot match
// `backend/`.
func underAnyPrefix(path string, prefixes []string) bool {
        for _, prefix := range prefixes {
                if strings.HasPrefix(path, prefix) {
                        return true
                }
        }
        return false
}

// IsTestFile returns true for paths that look like first-party C++
// tests. The convention in this repo is `*_test.cc` next to the
// implementation file, identical to googletest's recommendation;
// we do not have any test-only headers today.
//
// Source-only rules that need to relax for tests (e.g. allowing
// `std::cout` in fixture printers) consult this helper rather than
// hard-coding a path list, so an `*_test.cc` added to a new
// directory is treated correctly without an extra edit here.
func IsTestFile(path string) bool {
        base := filepath.Base(path)
        return strings.HasSuffix(base, "_test.cc") || strings.HasSuffix(base, "_test.cpp")
}

// IsClangTidyTranslationUnit returns true for `.cc` / `.cpp` / `.cxx`
// files that clang-tidy should analyze as standalone translation units.
// Headers are excluded: `compile_commands.json` lists only source
// files, so linting a header synthesizes a TU without Bazel's include
// paths and produces bogus `clang-diagnostic-error` findings (notably
// on `*_test_fixture.h` and other header-only helpers).
func IsClangTidyTranslationUnit(path string) bool {
        return strings.HasSuffix(path, ".cc") ||
                strings.HasSuffix(path, ".cpp") ||
                strings.HasSuffix(path, ".cxx")
}

// runList is the `cpp-lint list` subcommand. It prints the
// first-party C++ source list, one path per line, suitable for
// piping into `xargs clang-format`, `xargs clang-tidy`, or any
// other downstream tool.
func runList(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("list", stderr)
        withTests := fs.Bool("tests", true, "include *_test.cc files in the output")
        tidyOnly := fs.Bool("tidy", false, "emit only .cc/.cpp/.cxx translation units (for clang-tidy)")
        if err := fs.Parse(args); err != nil {
                return errUsage
        }
        if fs.NArg() != 0 {
                _, _ = fmt.Fprintln(stderr, "cpp-lint list: takes no positional arguments")
                return errUsage
        }
        lister, err := newGitSourceLister()
        if err != nil {
                return err
        }
        files, err := lister.List()
        if err != nil {
                return err
        }
        root, err := repoRoot()
        if err != nil {
                return err
        }
        files = filterExistingOnDisk(root, files)
        for _, f := range files {
                if !*withTests && IsTestFile(f) {
                        continue
                }
                if *tidyOnly && !IsClangTidyTranslationUnit(f) {
                        continue
                }
                _, _ = fmt.Fprintln(stdout, f)
        }
        return nil
}

// readSources returns the first-party source list using the
// production lister. Subcommands that need the list call this
// helper rather than constructing a lister themselves so the test
// suite has a single seam to swap in fixtures.
//
// `repoRoot` is returned alongside so callers can resolve relative
// paths against the same root the lister used. Tests override the
// resolver via the package-level `currentRepoRoot` variable below.
func readSources() ([]string, string, error) {
        root := currentRepoRoot()
        if root == "" {
                discovered, err := repoRoot()
                if err != nil {
                        return nil, "", err
                }
                root = discovered
        }
        lister := &gitSourceLister{repoRoot: root}
        files, err := lister.List()
        if err != nil {
                return nil, "", err
        }
        files = filterExistingOnDisk(root, files)
        return files, root, nil
}

// filterExistingOnDisk drops git-tracked paths that are not present
// on disk (for example after a split where old monoliths were deleted
// but `git rm` has not landed yet). Downstream tools such as
// clang-format would fail on the missing paths anyway.
func filterExistingOnDisk(root string, files []string) []string {
        out := make([]string, 0, len(files))
        for _, rel := range files {
                if _, err := os.Stat(resolveAgainstRoot(root, rel)); err == nil {
                        out = append(out, rel)
                }
        }
        return out
}

// currentRepoRoot is a test seam. Tests set it via setRepoRoot to
// pin the lister at a fixture worktree without exporting the
// internal type. The variable is consulted only by readSources()
// and resolveAgainstRoot() so production code paths stay
// unaffected when it is empty.
var testRepoRoot string //nolint:gochecknoglobals // test seam, see setRepoRoot

func currentRepoRoot() string { return testRepoRoot }

// setRepoRoot pins the test-only repo root. It returns a cleanup
// function so test cases can use `defer setRepoRoot(t, dir)()` to
// restore the previous value (always the empty string in
// practice).
func setRepoRoot(dir string) func() {
        prev := testRepoRoot
        testRepoRoot = dir
        return func() { testRepoRoot = prev }
}

// resolveAgainstRoot joins a first-party-relative path with the
// repo root. We never accept absolute paths from the source lister
// because every downstream consumer expects repo-relative output;
// the repo root is only spliced back in when the check needs to
// open the file from disk.
func resolveAgainstRoot(root, rel string) string {
        return filepath.Join(root, filepath.FromSlash(rel))
}

// readFile returns the file contents at path. Centralised so the
// check helpers do not each grow their own ioutil-style boilerplate
// (and so a future swap to memory-mapped reads has a single seam).
func readFile(path string) ([]byte, error) {
        //nolint:gosec // Paths come from the first-party source lister, which is itself tested.
        return os.ReadFile(path)
}

package main

import (
        "encoding/csv"
        "fmt"
        "io"
        "os"
        "path/filepath"
        "regexp"
        "strconv"
        "strings"
)

// tidyFinding is one clang-tidy diagnostic extracted from a batch log.
type tidyFinding struct {
        BlockFile       string // first-party file block being linted
        File            string // diagnostic path (normalized when possible)
        Line            int
        Column          int
        Severity        string
        Check           string
        Symbol          string
        Message         string
        ComplexityScore int // 0 when not a complexity finding
}

const (
        fileKindTest    = "test"
        fileKindFixture = "fixture"
        dispFix         = "fix"
)

var (
        reFileBlock  = regexp.MustCompile(`^========== (.+) ==========$`)
        reFailed     = regexp.MustCompile(`^FAILED: (.+)$`)
        reProcessing = regexp.MustCompile(`^Error while processing (.+)\.$`)
        // path:line:col: severity: message [check-name,...]
        reDiagnostic = regexp.MustCompile(`^(.+?):(\d+):(\d+): (warning|error|note): (.+?)(?: \[(.+?)\])?$`)
        reComplexity = regexp.MustCompile(`function '([^']+)' has cognitive complexity of (\d+)`)
)

func runParseTidyLog(args []string, stdout, stderr io.Writer) error {
        fs := flagSet(cmdParseTidyLog, stderr)
        logPath := fs.String("log", "lint-cpp-tidy.log", "clang-tidy batch log path")
        csvPath := fs.String("csv", "lint-cpp-tidy.csv", "CSV output path")
        mdPath := fs.String("markdown", "docs/dev/cpp-lint-tidy-triage.md", "triage markdown output path")
        if err := fs.Parse(args); err != nil {
                return errUsage
        }

        f, err := os.Open(*logPath)
        if err != nil {
                return fmt.Errorf("open log: %w", err)
        }
        defer func() { _ = f.Close() }()

        findings, failedFiles, totalBlocks := parseTidyLog(f)
        if err := writeTidyCSV(*csvPath, findings); err != nil {
                return err
        }
        if err := writeTriageMarkdown(*mdPath, findings, failedFiles, totalBlocks); err != nil {
                return err
        }
        printTidySummary(stdout, findings, failedFiles, totalBlocks, *csvPath, *mdPath)
        return nil
}

func parseTidyLog(r io.Reader) ([]tidyFinding, map[string]struct{}, int) {
        content, err := io.ReadAll(r)
        if err != nil {
                return nil, nil, 0
        }
        lines := strings.Split(string(content), "\n")

        failedFiles := make(map[string]struct{})
        totalBlocks := 0
        for _, line := range lines {
                if m := reFileBlock.FindStringSubmatch(line); m != nil {
                        totalBlocks++
                        continue
                }
                if m := reFailed.FindStringSubmatch(line); m != nil {
                        failedFiles[strings.TrimSpace(m[1])] = struct{}{}
                }
        }

        var findings []tidyFinding
        lastFailed := -1
        for i, line := range lines {
                m := reFailed.FindStringSubmatch(line)
                if m == nil {
                        continue
                }
                failedPath := strings.TrimSpace(m[1])
                start := lastFailed + 1
                lastFailed = i
                findings = append(findings, findingsForFailure(lines[start:i+1], failedPath)...)
        }
        return findings, failedFiles, totalBlocks
}

// findingsForFailure attributes diagnostics in a FAILED window onto the failed TU.
func findingsForFailure(window []string, failedPath string) []tidyFinding {
        var out []tidyFinding
        processingTU := failedPath
        windowMentionsFailed := false
        for _, line := range window {
                if strings.Contains(line, failedPath+":") {
                        windowMentionsFailed = true
                }
        }
        for _, line := range window {
                if m := reProcessing.FindStringSubmatch(line); m != nil {
                        processingTU = normalizeTidyPath(m[1], failedPath)
                }
                m := reDiagnostic.FindStringSubmatch(line)
                if m == nil {
                        continue
                }
                severity := m[4]
                if severity == "note" {
                        continue
                }
                checkRaw := m[6]
                if checkRaw == "" {
                        continue
                }
                check := strings.Split(checkRaw, ",")[0]
                check = strings.TrimPrefix(check, "-warnings-as-errors")

                rawPath := m[1]
                file := normalizeTidyPath(rawPath, processingTU)
                switch {
                case file == failedPath, strings.HasPrefix(line, failedPath+":"):
                        // direct hit
                case isExternalTidyPath(rawPath) && windowMentionsFailed:
                        file = failedPath
                case processingTU == failedPath:
                        // e.g. missing-header errors while linting a header TU
                default:
                        continue
                }

                lineNum, _ := strconv.Atoi(m[2])
                colNum, _ := strconv.Atoi(m[3])
                msg := m[5]
                symbol := ""
                complexity := 0
                if cm := reComplexity.FindStringSubmatch(msg); cm != nil {
                        symbol = cm[1]
                        complexity, _ = strconv.Atoi(cm[2])
                }
                out = append(out, tidyFinding{
                        BlockFile:       failedPath,
                        File:            file,
                        Line:            lineNum,
                        Column:          colNum,
                        Severity:        severity,
                        Check:           check,
                        Symbol:          symbol,
                        Message:         msg,
                        ComplexityScore: complexity,
                })
        }
        return dedupeFindings(out)
}

func isExternalTidyPath(raw string) bool {
        return strings.HasPrefix(raw, "bazel-out/") ||
                strings.HasPrefix(raw, "external/") ||
                strings.Contains(raw, "/external/googlesql")
}

func dedupeFindings(in []tidyFinding) []tidyFinding {
        seen := make(map[string]struct{})
        var out []tidyFinding
        for _, f := range in {
                key := fmt.Sprintf("%s:%d:%d:%s:%s", f.File, f.Line, f.Column, f.Check, f.Message)
                if _, ok := seen[key]; ok {
                        continue
                }
                seen[key] = struct{}{}
                out = append(out, f)
        }
        return out
}

// normalizeTidyPath maps clang-tidy diagnostic paths onto first-party
// repo-relative paths. External / bazel-out paths are attributed to the
// TU block file so triage stays one row per `FAILED:` entry.
func normalizeTidyPath(raw, blockFile string) string {
        path := strings.TrimSpace(raw)
        path = strings.TrimPrefix(path, "./")
        if strings.HasPrefix(path, "/") {
                if idx := strings.Index(path, "/backend/"); idx >= 0 {
                        return strings.TrimPrefix(path[idx+1:], "/")
                }
                const marker = "bigquery-emulator/"
                if _, after, ok := strings.Cut(path, marker); ok {
                        return after
                }
                return blockFile
        }
        if strings.HasPrefix(path, "external/") || strings.HasPrefix(path, "bazel-out/") {
                return blockFile
        }
        return path
}

func writeTidyCSV(path string, findings []tidyFinding) error {
        //nolint:gosec // Output path comes from task wrapper defaults or explicit flags.
        if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil && !os.IsExist(err) {
                // csv at repo root has no dir
                if filepath.Dir(path) != "." {
                        return err
                }
        }
        //nolint:gosec // Output path comes from task wrapper defaults or explicit flags.
        out, err := os.Create(path)
        if err != nil {
                return fmt.Errorf("create csv: %w", err)
        }
        defer func() { _ = out.Close() }()

        w := csv.NewWriter(out)
        if err := w.Write([]string{
                "block_file", "file", "line", "column", "severity", "check", "symbol", "message", "complexity_score",
        }); err != nil {
                return err
        }
        for _, f := range findings {
                if err := w.Write([]string{
                        f.BlockFile,
                        f.File,
                        strconv.Itoa(f.Line),
                        strconv.Itoa(f.Column),
                        f.Severity,
                        f.Check,
                        f.Symbol,
                        f.Message,
                        strconv.Itoa(f.ComplexityScore),
                }); err != nil {
                        return err
                }
        }
        w.Flush()
        return w.Error()
}

package main

import (
        "fmt"
        "io"
        "os"
        "path/filepath"
        "sort"
        "strings"
)

type fileSummary struct {
        File            string
        Findings        []tidyFinding
        WorstComplexity int
        Checks          map[string]int
}

func failedFileForFinding(f tidyFinding, failedFiles map[string]struct{}) string {
        if _, ok := failedFiles[f.File]; ok {
                return f.File
        }
        if _, ok := failedFiles[f.BlockFile]; ok {
                return f.BlockFile
        }
        return ""
}

func summarizeByFile(findings []tidyFinding, failedFiles map[string]struct{}) []fileSummary {
        byFile := make(map[string]*fileSummary)
        for path := range failedFiles {
                byFile[path] = &fileSummary{File: path, Checks: make(map[string]int)}
        }
        for _, f := range findings {
                key := failedFileForFinding(f, failedFiles)
                if key == "" {
                        continue
                }
                s := byFile[key]
                s.Findings = append(s.Findings, f)
                s.Checks[f.Check]++
                if f.ComplexityScore > s.WorstComplexity {
                        s.WorstComplexity = f.ComplexityScore
                }
        }
        out := make([]fileSummary, 0, len(byFile))
        for _, s := range byFile {
                out = append(out, *s)
        }
        sort.Slice(out, func(i, j int) bool { return out[i].File < out[j].File })
        return out
}

func subsystemFor(path string) string {
        switch {
        case strings.HasPrefix(path, "backend/catalog/"):
                return "catalog"
        case strings.HasPrefix(path, "backend/engine/control/"):
                return "control"
        case strings.HasPrefix(path, "backend/engine/coordinator/"):
                return "coordinator"
        case strings.HasPrefix(path, "backend/engine/duckdb/"):
                return "duckdb"
        case strings.HasPrefix(path, "backend/engine/semantic/"):
                return "semantic"
        case strings.HasPrefix(path, "backend/storage/"):
                return "storage"
        case strings.HasPrefix(path, "backend/schema/"):
                return "schema"
        case strings.HasPrefix(path, "backend/sqltools/"):
                return "sqltools"
        case strings.HasPrefix(path, "frontend/handlers/"):
                return "frontend"
        case strings.HasPrefix(path, "binaries/"):
                return "binaries"
        case strings.HasPrefix(path, "tools/googlesql-prebuilt/smoke/"):
                return "smoke"
        default:
                return "other"
        }
}

func fileKind(path string) string {
        base := filepath.Base(path)
        switch {
        case strings.HasSuffix(base, "_test.cc"):
                return fileKindTest
        case strings.HasSuffix(base, "_test_fixture.h"), strings.HasSuffix(base, "_internal.h"):
                return fileKindFixture
        case strings.HasSuffix(base, ".h"), strings.HasSuffix(base, ".hpp"):
                return "header"
        default:
                return "production"
        }
}

func dispositionFor(s fileSummary) (disp string, wave int) {
        hasNonComplexity := false
        for check := range s.Checks {
                if check != "readability-function-cognitive-complexity" &&
                        check != "readability-function-size" {
                        hasNonComplexity = true
                        break
                }
        }
        if hasNonComplexity {
                for check := range s.Checks {
                        if strings.HasPrefix(check, "modernize-") {
                                return "autofix", 0
                        }
                }
                return dispFix, 3
        }

        kind := fileKind(s.File)
        score := s.WorstComplexity
        switch {
        case score > 50:
                return dispFix, 1
        case score >= 35:
                return dispFix, 2
        case score >= 26:
                if kind == fileKindTest || kind == fileKindFixture {
                        return "NOLINT", 4
                }
                return dispFix, 4
        case kind == fileKindTest || kind == fileKindFixture:
                return "NOLINT", 4
        default:
                return dispFix, 4
        }
}

func writeTriageMarkdown(path string, findings []tidyFinding, failedFiles map[string]struct{}, totalBlocks int) error {
        //nolint:gosec // Output path comes from task wrapper defaults or explicit flags.
        if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
                return fmt.Errorf("mkdir triage doc: %w", err)
        }
        content := buildTriageMarkdown(findings, failedFiles, totalBlocks)
        //nolint:gosec // Generated triage doc is intentionally world-readable.
        return os.WriteFile(path, []byte(content), 0o644)
}

func buildTriageMarkdown(findings []tidyFinding, failedFiles map[string]struct{}, totalBlocks int) string {
        summaries := summarizeByFile(findings, failedFiles)

        checkCounts := make(map[string]int)
        complexityCount := 0
        for _, f := range findings {
                checkCounts[f.Check]++
                if f.ComplexityScore > 0 {
                        complexityCount++
                }
        }

        var b strings.Builder
        writeTriageSummary(&b, summaries, findings, checkCounts, complexityCount, totalBlocks)
        writeTriageMatrix(&b, summaries)
        writeTriageWaves(&b)
        return b.String()
}

func writeTriageSummary(
        b *strings.Builder,
        summaries []fileSummary,
        findings []tidyFinding,
        checkCounts map[string]int,
        complexityCount, totalBlocks int,
) {
        b.WriteString("# clang-tidy triage (first-party C++)\n\n")
        b.WriteString("Generated by `task lint:cpp:tidy-report` from `lint-cpp-tidy.log`.\n\n")
        b.WriteString("## Summary\n\n")
        _, _ = fmt.Fprintf(b, "| Metric | Value |\n|---|---|\n")
        _, _ = fmt.Fprintf(b, "| Files scanned | %d |\n", totalBlocks)
        _, _ = fmt.Fprintf(b, "| Files failed | %d |\n", len(summaries))
        _, _ = fmt.Fprintf(b, "| Total findings | %d |\n", len(findings))
        _, _ = fmt.Fprintf(b, "| Cognitive-complexity findings | %d |\n\n", complexityCount)

        b.WriteString("**Check mix:** Failures are **not** complexity-only. ")
        b.WriteString(
                "Cognitive-complexity is the largest bucket, but analyzer and cppcoreguidelines checks also fail the lane.\n\n",
        )

        b.WriteString("### Findings by check\n\n")
        b.WriteString("| Check | Count |\n|---|---|\n")
        for _, check := range sortedKeys(checkCounts) {
                _, _ = fmt.Fprintf(b, "| `%s` | %d |\n", check, checkCounts[check])
        }
        b.WriteString("\n### Failed files by subsystem\n\n")
        b.WriteString("| Subsystem | Files |\n|---|---|\n")
        subFailed := make(map[string]int)
        for _, s := range summaries {
                subFailed[subsystemFor(s.File)]++
        }
        for _, sub := range sortedKeys(subFailed) {
                _, _ = fmt.Fprintf(b, "| %s | %d |\n", sub, subFailed[sub])
        }
}

func writeTriageMatrix(b *strings.Builder, summaries []fileSummary) {
        b.WriteString("\n## Disposition matrix\n\n")
        b.WriteString("| File | Findings | Checks | Worst complexity | Subsystem | Kind | Disposition | Wave | Notes |\n")
        b.WriteString("|---|---:|---|---:|---|---|---|---:|---|\n")
        for _, s := range summaries {
                checks := sortedCheckList(s.Checks)
                symbols := uniqueSymbols(s.Findings)
                disp, wave := dispositionFor(s)
                notes := strings.Join(symbols, ", ")
                if len(notes) > 80 {
                        notes = notes[:77] + "..."
                }
                _, _ = fmt.Fprintf(b, "| `%s` | %d | %s | %d | %s | %s | %s | %d | %s |\n",
                        s.File, len(s.Findings), checks, s.WorstComplexity,
                        subsystemFor(s.File), fileKind(s.File), disp, wave, notes)
        }
}

func writeTriageWaves(b *strings.Builder) {
        b.WriteString("\n## Remediation waves\n\n")
        b.WriteString("1. **Wave 0** — Tooling + `clang-tidy --fix` on rows dispositioned `autofix`.\n")
        b.WriteString("2. **Wave 1** — P0 complexity (>50): dedicated PRs with tests.\n")
        b.WriteString("3. **Wave 2** — P1 complexity (35–50): one PR per subsystem.\n")
        b.WriteString("4. **Wave 3** — Non-complexity correctness (`bugprone-*`, `clang-analyzer-*`).\n")
        b.WriteString("5. **Wave 4** — P2 complexity (26–34), tests/fixtures, scoped NOLINT.\n")
}

func sortedKeys(m map[string]int) []string {
        keys := make([]string, 0, len(m))
        for k := range m {
                keys = append(keys, k)
        }
        sort.Slice(keys, func(i, j int) bool {
                if m[keys[i]] != m[keys[j]] {
                        return m[keys[i]] > m[keys[j]]
                }
                return keys[i] < keys[j]
        })
        return keys
}

func sortedCheckList(m map[string]int) string {
        keys := sortedKeys(m)
        parts := make([]string, len(keys))
        for i, k := range keys {
                parts[i] = fmt.Sprintf("%s (%d)", k, m[k])
        }
        return strings.Join(parts, "; ")
}

func uniqueSymbols(findings []tidyFinding) []string {
        seen := make(map[string]struct{})
        var out []string
        for _, f := range findings {
                name := f.Symbol
                if name == "" {
                        name = truncateMsg(f.Message, 40)
                }
                if _, ok := seen[name]; ok {
                        continue
                }
                seen[name] = struct{}{}
                out = append(out, name)
        }
        return out
}

func truncateMsg(s string, n int) string {
        if len(s) <= n {
                return s
        }
        return s[:n-3] + "..."
}

func printTidySummary(
        w io.Writer,
        findings []tidyFinding,
        failedFiles map[string]struct{},
        totalBlocks int,
        csvPath, mdPath string,
) {
        checkCounts := make(map[string]int)
        complexityCount := 0
        for _, f := range findings {
                checkCounts[f.Check]++
                if f.ComplexityScore > 0 {
                        complexityCount++
                }
        }
        _, _ = fmt.Fprintf(w, "lint:cpp:tidy-report: scanned %d file blocks, %d failed files, %d findings\n",
                totalBlocks, len(failedFiles), len(findings))
        _, _ = fmt.Fprintf(w, "  cognitive-complexity findings: %d\n", complexityCount)
        _, _ = fmt.Fprintf(w, "  checks:\n")
        for _, check := range sortedKeys(checkCounts) {
                _, _ = fmt.Fprintf(w, "    %s: %d\n", check, checkCounts[check])
        }
        _, _ = fmt.Fprintf(w, "  csv: %s\n", csvPath)
        _, _ = fmt.Fprintf(w, "  triage: %s\n", mdPath)
}

package main

import (
        "encoding/json"
        "fmt"
        "io"
        "os"
        "strings"
)

const (
        missingMessage = "n/a"
        colorBlue      = "blue"
        colorOrange    = "orange"
        colorLightgrey = "lightgrey"
)

type badgeJSON struct {
        SchemaVersion int    `json:"schemaVersion"`
        Label         string `json:"label"`
        Message       string `json:"message"`
        Color         string `json:"color"`
}

func badgeMessage(version string) string {
        version = strings.TrimSpace(version)
        if version == "" {
                return missingMessage
        }
        return version
}

func badgeColor(version string) string {
        version = strings.TrimSpace(version)
        if version == "" {
                return colorLightgrey
        }
        if isPreRelease(version) {
                return colorOrange
        }
        return colorBlue
}

func isPreRelease(tag string) bool {
        tag = strings.TrimPrefix(tag, "v")
        return strings.Contains(tag, "-")
}

func runBadge(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("badge", stderr)
        version := fs.String("version", "", "release tag to render (empty renders n/a)")
        out := fs.String("out", "", "output badge JSON path (default: stdout)")
        label := fs.String("label", "release", "badge label")
        if err := fs.Parse(args); err != nil {
                return err
        }

        payload := badgeJSON{
                SchemaVersion: 1,
                Label:         *label,
                Message:       badgeMessage(*version),
                Color:         badgeColor(*version),
        }
        return emitBadge(&payload, *out, stdout)
}

func emitBadge(b *badgeJSON, outPath string, stdout io.Writer) error {
        buf, err := json.MarshalIndent(b, "", "  ")
        if err != nil {
                return fmt.Errorf("marshal badge: %w", err)
        }
        buf = append(buf, '\n')

        if outPath == "" {
                _, err = stdout.Write(buf)
                return err
        }
        //nolint:gosec // 0o644 is the right mode for a CI-published JSON artifact.
        if err := os.WriteFile(outPath, buf, 0o644); err != nil {
                return fmt.Errorf("write %q: %w", outPath, err)
        }
        return nil
}

// Command release emits shields.io endpoint JSON for the README
// release badge. The release workflow publishes badge-release.json
// so the badge does not depend on shields.io's shared GitHub API
// token pool.
package main

import (
        "errors"
        "flag"
        "fmt"
        "io"
        "os"
)

const (
        cmdBadge       = "badge"
        cmdReadmeBadge = "readme-badge"
)

var errUsage = errors.New("usage error")

func run(args []string, stdout, stderr io.Writer) error {
        if len(args) < 1 {
                usage(stderr)
                return errUsage
        }
        switch args[0] {
        case cmdBadge:
                return runBadge(args[1:], stdout, stderr)
        case cmdReadmeBadge:
                return runReadmeBadge(args[1:], stdout, stderr)
        case "-h", "--help", "help":
                usage(stdout)
                return nil
        default:
                _, _ = fmt.Fprintf(stderr, "release: unknown subcommand %q\n\n", args[0])
                usage(stderr)
                return errUsage
        }
}

func usage(w io.Writer) {
        _, _ = fmt.Fprint(w, `release - gh-pages badge JSON for the README release shield.

Subcommands:
  badge        Emit shields.io endpoint JSON for the latest semver release tag.
  readme-badge Patch README.md shields.io cache buster (&v=...) for the release badge.

Run "release badge -h" or "release readme-badge -h" for flags.
`)
}

func flagSet(name string, stderr io.Writer) *flag.FlagSet {
        fs := flag.NewFlagSet(name, flag.ContinueOnError)
        fs.SetOutput(stderr)
        return fs
}

func main() {
        if err := run(os.Args[1:], os.Stdout, os.Stderr); err != nil {
                if errors.Is(err, errUsage) {
                        os.Exit(2)
                }
                _, _ = fmt.Fprintf(os.Stderr, "release: %v\n", err)
                os.Exit(1)
        }
}

package main

import (
        "errors"
        "fmt"
        "io"
        "os"
        "regexp"
        "strings"
)

const defaultReadmePath = "README.md"

// badgeReleaseCacheBusterRe matches the shields.io endpoint URL cache-buster
// query param on the README release badge line.
var badgeReleaseCacheBusterRe = regexp.MustCompile(
        `(badge-release\.json&v=)[^)]+`,
)

func readmeBadgeCacheVersion(version string) (string, error) {
        version = strings.TrimSpace(version)
        if version == "" {
                return "", errors.New("version is required")
        }
        return strings.TrimPrefix(version, "v"), nil
}

func patchReadmeBadgeCacheBuster(readme []byte, cacheVersion string) ([]byte, bool, error) {
        if !badgeReleaseCacheBusterRe.Match(readme) {
                return nil, false, errors.New("README release badge cache buster not found")
        }
        replacement := []byte("${1}" + cacheVersion)
        out := badgeReleaseCacheBusterRe.ReplaceAll(readme, replacement)
        return out, !bytesEqual(readme, out), nil
}

func bytesEqual(a, b []byte) bool {
        if len(a) != len(b) {
                return false
        }
        for i := range a {
                if a[i] != b[i] {
                        return false
                }
        }
        return true
}

func runReadmeBadge(args []string, stdout, stderr io.Writer) error {
        fs := flagSet("readme-badge", stderr)
        version := fs.String("version", "", "release tag to render (e.g. v0.3.0)")
        readmePath := fs.String("readme", defaultReadmePath, "README path to patch")
        if err := fs.Parse(args); err != nil {
                return err
        }

        cacheVersion, err := readmeBadgeCacheVersion(*version)
        if err != nil {
                return err
        }

        raw, err := os.ReadFile(*readmePath)
        if err != nil {
                return fmt.Errorf("read %q: %w", *readmePath, err)
        }

        patched, changed, err := patchReadmeBadgeCacheBuster(raw, cacheVersion)
        if err != nil {
                return err
        }
        if !changed {
                _, _ = fmt.Fprintf(stdout, "readme-badge: %s already at &v=%s\n", *readmePath, cacheVersion)
                return nil
        }

        //nolint:gosec // 0o644 is the right mode for a tracked README.
        if err := os.WriteFile(*readmePath, patched, 0o644); err != nil {
                return fmt.Errorf("write %q: %w", *readmePath, err)
        }
        _, _ = fmt.Fprintf(stdout, "readme-badge: patched %s to &v=%s\n", *readmePath, cacheVersion)
        return nil
}