Line data Source code
1 : // Unit tests for the BigQuery string polyfill macros.
2 : //
3 : // Each test drives the macro directly against an in-process DuckDB
4 : // connection and exercises both the common path and the
5 : // BigQuery-specific edge case the wrapper exists to pin.
6 :
7 : #include <cstdint>
8 : #include <string>
9 :
10 : #include "absl/status/status.h"
11 : #include "backend/engine/duckdb/udf/registrar.h"
12 : #include "duckdb.h"
13 : #include "gtest/gtest.h"
14 :
15 : namespace bigquery_emulator {
16 : namespace backend {
17 : namespace engine {
18 : namespace duckdb {
19 : namespace udf {
20 : namespace {
21 :
22 : class StringMacrosTest : public ::testing::Test {
23 : protected:
24 8 : void SetUp() override {
25 8 : ASSERT_EQ(::duckdb_open(nullptr, &db_), ::DuckDBSuccess);
26 8 : ASSERT_EQ(::duckdb_connect(db_, &conn_), ::DuckDBSuccess);
27 8 : absl::Status reg = RegisterAll(conn_);
28 16 : ASSERT_TRUE(reg.ok()) << reg;
29 8 : }
30 :
31 8 : void TearDown() override {
32 8 : if (conn_ != nullptr) ::duckdb_disconnect(&conn_);
33 8 : if (db_ != nullptr) ::duckdb_close(&db_);
34 8 : }
35 :
36 4 : int64_t RunInt64(const std::string& sql) {
37 4 : ::duckdb_result result;
38 4 : auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
39 8 : EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: "
40 8 : << (::duckdb_result_error(&result) == nullptr
41 8 : ? "(no error)"
42 8 : : ::duckdb_result_error(&result))
43 8 : << " (sql=" << sql << ")";
44 4 : int64_t v = ::duckdb_value_int64(&result, 0, 0);
45 4 : ::duckdb_destroy_result(&result);
46 4 : return v;
47 4 : }
48 :
49 3 : bool RunIsNull(const std::string& sql) {
50 3 : ::duckdb_result result;
51 3 : auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
52 6 : EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: " << sql;
53 3 : bool v = ::duckdb_value_is_null(&result, 0, 0);
54 3 : ::duckdb_destroy_result(&result);
55 3 : return v;
56 3 : }
57 :
58 : ::duckdb_database db_ = nullptr;
59 : ::duckdb_connection conn_ = nullptr;
60 : };
61 :
62 : // --- bq_strpos ---------------------------------------------------
63 :
64 1 : TEST_F(StringMacrosTest, StrposReturnsOneBasedIndex) {
65 : // Edge case pinned: BigQuery STRPOS is 1-based. A regression
66 : // that returned a 0-based index would surface here as 2 instead
67 : // of 3.
68 1 : EXPECT_EQ(RunInt64("SELECT bq_strpos('hello', 'll')"), 3);
69 1 : EXPECT_EQ(RunInt64("SELECT bq_strpos('hello', 'h')"), 1);
70 1 : }
71 :
72 1 : TEST_F(StringMacrosTest, StrposMissingNeedleReturnsZero) {
73 : // Edge case pinned: BigQuery STRPOS returns 0 (NOT -1, NOT NULL)
74 : // when the needle is not found.
75 1 : EXPECT_EQ(RunInt64("SELECT bq_strpos('hello', 'zz')"), 0);
76 1 : }
77 :
78 1 : TEST_F(StringMacrosTest, StrposEmptyNeedle) {
79 : // Edge case pinned: BigQuery STRPOS('abc', '') returns 1
80 : // (empty substring matches at position 1).
81 1 : EXPECT_EQ(RunInt64("SELECT bq_strpos('abc', '')"), 1);
82 1 : }
83 :
84 1 : TEST_F(StringMacrosTest, StrposNullPropagation) {
85 1 : EXPECT_TRUE(RunIsNull("SELECT bq_strpos(NULL::VARCHAR, 'll')"));
86 1 : EXPECT_TRUE(RunIsNull("SELECT bq_strpos('hello', NULL::VARCHAR)"));
87 1 : EXPECT_TRUE(RunIsNull("SELECT bq_strpos(NULL::VARCHAR, NULL::VARCHAR)"));
88 1 : }
89 :
90 : // --- bq_split ----------------------------------------------------
91 :
92 : // Helper for LIST-returning macros: wraps the macro call in
93 : // `list_aggregate(<call>, 'string_agg', '|')` so the result is a
94 : // single VARCHAR with the list elements joined by `|`. The
95 : // per-test assertions compare that joined form (easier to eyeball
96 : // than a manually-walked column buffer; the macro's contract is
97 : // fully captured by the ordered set of elements).
98 : //
99 : // Note: BigQuery SPLIT's return type is LIST(VARCHAR); DuckDB's
100 : // `string_split` returns the same shape, and DuckDB's
101 : // `array_to_string(list, sep)` flattens it. We use `'|'` as the
102 : // join separator so test fixtures that include `,` in the data
103 : // stay readable.
104 5 : std::string JoinSplit(::duckdb_connection conn, const std::string& macro_call) {
105 5 : ::duckdb_result result;
106 5 : const std::string wrapped = "SELECT array_to_string(" + macro_call + ", '|')";
107 5 : auto rc = ::duckdb_query(conn, wrapped.c_str(), &result);
108 5 : if (rc != ::DuckDBSuccess) {
109 0 : ADD_FAILURE() << "DuckDB rejected: "
110 0 : << (::duckdb_result_error(&result) == nullptr
111 0 : ? "(no error)"
112 0 : : ::duckdb_result_error(&result))
113 0 : << " (sql=" << wrapped << ")";
114 0 : ::duckdb_destroy_result(&result);
115 0 : return "(rejected)";
116 0 : }
117 5 : char* raw = ::duckdb_value_varchar(&result, 0, 0);
118 5 : std::string out = raw == nullptr ? std::string("") : std::string(raw);
119 5 : ::duckdb_free(raw);
120 5 : ::duckdb_destroy_result(&result);
121 5 : return out;
122 5 : }
123 :
124 1 : TEST_F(StringMacrosTest, SplitDefaultDelimiterIsComma) {
125 : // Edge case pinned: BigQuery SPLIT(value) (single-arg form)
126 : // defaults to splitting on `,`. A regression that registered
127 : // the macro without a `delimiter := ','` default would surface
128 : // here as a binder error.
129 1 : EXPECT_EQ(JoinSplit(conn_, "bq_split('a,b,c')"), "a|b|c");
130 1 : EXPECT_EQ(JoinSplit(conn_, "bq_split('one,two')"), "one|two");
131 1 : }
132 :
133 1 : TEST_F(StringMacrosTest, SplitCustomDelimiter) {
134 1 : EXPECT_EQ(JoinSplit(conn_, "bq_split('a;b;c', ';')"), "a|b|c");
135 1 : EXPECT_EQ(JoinSplit(conn_, "bq_split('foo-bar-baz', '-')"), "foo|bar|baz");
136 1 : }
137 :
138 1 : TEST_F(StringMacrosTest, SplitEmptyInputReturnsSingleEmpty) {
139 : // Edge case pinned: BigQuery SPLIT('', ',') returns a list
140 : // containing one empty string (BQ contract: a non-empty
141 : // delimiter always splits, even an empty input). DuckDB's
142 : // string_split agrees today; the test pins it.
143 1 : EXPECT_EQ(JoinSplit(conn_, "bq_split('', ',')"), "");
144 1 : }
145 :
146 1 : TEST_F(StringMacrosTest, SplitNullPropagation) {
147 : // Wrap in a `r IS NULL` SQL boolean rather than relying on
148 : // `duckdb_value_is_null` for LIST cells; the C API's NULL
149 : // detector for LIST columns is shape-sensitive across DuckDB
150 : // versions, and a SQL-level `IS NULL` is unambiguous.
151 1 : ::duckdb_result result;
152 1 : for (const char* sql : {
153 1 : "SELECT bq_split(NULL::VARCHAR) IS NULL",
154 1 : "SELECT bq_split('a,b,c', NULL::VARCHAR) IS NULL",
155 2 : }) {
156 2 : auto rc = ::duckdb_query(conn_, sql, &result);
157 4 : ASSERT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: " << sql;
158 4 : EXPECT_TRUE(::duckdb_value_boolean(&result, 0, 0))
159 4 : << "expected NULL propagation for " << sql;
160 2 : ::duckdb_destroy_result(&result);
161 2 : }
162 1 : }
163 :
164 : } // namespace
165 : } // namespace udf
166 : } // namespace duckdb
167 : } // namespace engine
168 : } // namespace backend
169 : } // namespace bigquery_emulator
|