Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 :
3 : namespace bigquery_emulator {
4 : namespace backend {
5 : namespace engine {
6 : namespace duckdb {
7 : namespace transpiler {
8 :
9 : // --- Functions disposition table ----------------------------------------
10 :
11 1 : TEST(FunctionsTableTest, LookupKnownMappedScalar) {
12 : // Sanity check on a representative `kDuckdbNative` entry. The
13 : // lookup is case-insensitive (we accept `ABS`, `abs`, `Abs` all
14 : // the same).
15 1 : const FnEntry* e = LookupFunction("abs");
16 1 : ASSERT_NE(e, nullptr);
17 1 : EXPECT_EQ(e->disposition, Disposition::kDuckdbNative);
18 1 : EXPECT_EQ(e->duckdb_name, "ABS");
19 1 : EXPECT_FALSE(e->planned);
20 1 : const FnEntry* upper = LookupFunction("ABS");
21 1 : ASSERT_NE(upper, nullptr);
22 1 : EXPECT_EQ(upper->disposition, Disposition::kDuckdbNative);
23 1 : EXPECT_EQ(upper->duckdb_name, "ABS");
24 1 : }
25 :
26 1 : TEST(FunctionsTableTest, LookupKnownAggregate) {
27 : // `array_agg` / `string_agg` are `duckdb_rewrite` because ORDER BY /
28 : // LIMIT modifiers lower to list()/list_slice() in the aggregate emit.
29 1 : const FnEntry* agg = LookupFunction("array_agg");
30 1 : ASSERT_NE(agg, nullptr);
31 1 : EXPECT_EQ(agg->disposition, Disposition::kDuckdbRewrite);
32 1 : EXPECT_EQ(agg->duckdb_name, "ARRAY_AGG");
33 1 : const FnEntry* string_agg = LookupFunction("string_agg");
34 1 : ASSERT_NE(string_agg, nullptr);
35 1 : EXPECT_EQ(string_agg->disposition, Disposition::kDuckdbRewrite);
36 1 : EXPECT_EQ(string_agg->duckdb_name, "STRING_AGG");
37 1 : const FnEntry* sum = LookupFunction("sum");
38 1 : ASSERT_NE(sum, nullptr);
39 1 : EXPECT_EQ(sum->disposition, Disposition::kDuckdbNative);
40 1 : EXPECT_EQ(sum->duckdb_name, "SUM");
41 1 : }
42 :
43 1 : TEST(FunctionsTableTest, LookupUnsupportedFunction) {
44 : // `unsupported` disposition: the lookup succeeds but the
45 : // disposition tells the caller to short-circuit to "" so the
46 : // engine surfaces UNIMPLEMENTED. Owning plan is the specialised
47 : // feature policy.
48 1 : const FnEntry* e = LookupFunction("approx_quantiles");
49 1 : ASSERT_NE(e, nullptr);
50 1 : EXPECT_EQ(e->disposition, Disposition::kSemanticExecutor);
51 1 : EXPECT_TRUE(e->duckdb_name.empty());
52 1 : EXPECT_FALSE(e->planned);
53 1 : }
54 :
55 : // `LookupPlannedDuckdbUdfFunction` was deleted alongside the
56 : // polyfill UDF library plan's wrap-up commit (every former
57 : // `status=planned duckdb_udf` row pointing at the polyfill plan
58 : // either flipped to ready `duckdb_udf` / `duckdb_native` or
59 : // re-pointed to `status=planned semantic_executor` per the
60 : // plan's "no silent approximation" rule). The reverse-direction
61 : // invariant ("no `status=planned duckdb_udf` row points at the
62 : // polyfill plan anymore") is enforced by the YAML genrule + the
63 : // `CoverageMeetsPlanThreshold` test below; tabling the
64 : // per-function planned-shape probe here would just shadow that.
65 :
66 1 : TEST(FunctionsTableTest, LookupReadyDuckdbUdfFunction) {
67 : // Ready `duckdb_udf` rows store the registered macro name in
68 : // `duckdb_name=`; the transpiler emits the call identically to a
69 : // `duckdb_native` row. `mod` and `div` flipped from
70 : // `status=planned` to ready in the polyfill UDF library's
71 : // numeric-family commit.
72 1 : const FnEntry* mod = LookupFunction("mod");
73 1 : ASSERT_NE(mod, nullptr);
74 1 : EXPECT_EQ(mod->disposition, Disposition::kDuckdbUdf);
75 1 : EXPECT_EQ(mod->duckdb_name, "bq_mod");
76 1 : EXPECT_FALSE(mod->planned);
77 1 : const FnEntry* div = LookupFunction("div");
78 1 : ASSERT_NE(div, nullptr);
79 1 : EXPECT_EQ(div->disposition, Disposition::kDuckdbUdf);
80 1 : EXPECT_EQ(div->duckdb_name, "bq_div");
81 1 : EXPECT_FALSE(div->planned);
82 1 : }
83 :
84 1 : TEST(FunctionsTableTest, LookupPlannedSemanticExecutorFunction) {
85 : // SAFE-family rows route to the semantic executor (BigQuery-exact
86 : // semantics differ from DuckDB's raise-on-overflow). Runtime
87 : // stays UNIMPLEMENTED until `docs/ENGINE_POLICY.md`
88 : // lands.
89 1 : const FnEntry* e = LookupFunction("safe_divide");
90 1 : ASSERT_NE(e, nullptr);
91 1 : EXPECT_EQ(e->disposition, Disposition::kSemanticExecutor);
92 1 : EXPECT_TRUE(e->duckdb_name.empty());
93 1 : EXPECT_FALSE(e->planned);
94 1 : }
95 :
96 1 : TEST(FunctionsTableTest, LookupUnknownReturnsNull) {
97 : // Functions not in the YAML disposition table return nullptr; the
98 : // transpiler treats nullptr the same as a planned-but-not-
99 : // implemented entry, but the distinction lets the LOG(INFO) tell
100 : // "configured planned route" from "no disposition row".
101 1 : EXPECT_EQ(LookupFunction("totally_made_up_function"), nullptr);
102 1 : }
103 :
104 1 : TEST(FunctionsTableTest, CoverageMeetsPlanThreshold) {
105 : // The plan requires the disposition table to cover at least 50
106 : // BigQuery functions across the math / string / datetime /
107 : // conditional / array / aggregation / window / unsupported-family
108 : // categories. We spot-check a few entries from each category here
109 : // rather than hard-counting the size of the underlying map (which
110 : // is private to `functions.cc`) -- a regression in the YAML would
111 : // surface as one of these sentinel lookups returning nullptr.
112 1 : const std::vector<std::string> required = {
113 : // math
114 1 : "abs",
115 1 : "ceil",
116 1 : "floor",
117 1 : "round",
118 1 : "trunc",
119 1 : "sqrt",
120 1 : "exp",
121 1 : "sign",
122 1 : "greatest",
123 1 : "least",
124 1 : "pi",
125 1 : "ln",
126 1 : "pow",
127 : // string
128 1 : "concat",
129 1 : "length",
130 1 : "lower",
131 1 : "upper",
132 1 : "substr",
133 1 : "replace",
134 1 : "trim",
135 1 : "ltrim",
136 1 : "rtrim",
137 1 : "lpad",
138 1 : "rpad",
139 1 : "reverse",
140 1 : "starts_with",
141 1 : "ends_with",
142 : // datetime (fallback)
143 1 : "current_timestamp",
144 1 : "current_date",
145 1 : "date_add",
146 1 : "format_timestamp",
147 : // conditional
148 1 : "ifnull",
149 1 : "coalesce",
150 1 : "nullif",
151 : // array
152 1 : "array_length",
153 1 : "array_concat",
154 1 : "generate_array",
155 : // aggregation
156 1 : "count",
157 1 : "sum",
158 1 : "avg",
159 1 : "min",
160 1 : "max",
161 1 : "any_value",
162 1 : "array_agg",
163 1 : "string_agg",
164 : // local_stub families (specialized-feature-policy)
165 1 : "approx_quantiles",
166 1 : "ml.predict",
167 1 : "keys.new_keyset",
168 1 : "net.ip_from_string",
169 : // window
170 1 : "row_number",
171 1 : "rank",
172 1 : "dense_rank",
173 1 : };
174 52 : for (const auto& name : required) {
175 104 : EXPECT_NE(LookupFunction(name), nullptr) << "missing entry: " << name;
176 52 : }
177 1 : EXPECT_GE(required.size(), 50u);
178 1 : }
179 :
180 : } // namespace transpiler
181 : } // namespace duckdb
182 : } // namespace engine
183 : } // namespace backend
184 : } // namespace bigquery_emulator
|