Line data Source code
1 : // Unit tests for the BigQuery regex polyfill macros.
2 : //
3 : // Each test drives the macro directly against an in-process DuckDB
4 : // connection and exercises both the common path and the
5 : // BigQuery-specific edge case the wrapper exists to pin.
6 :
7 : #include <string>
8 :
9 : #include "absl/status/status.h"
10 : #include "backend/engine/duckdb/udf/registrar.h"
11 : #include "duckdb.h"
12 : #include "gtest/gtest.h"
13 :
14 : namespace bigquery_emulator {
15 : namespace backend {
16 : namespace engine {
17 : namespace duckdb {
18 : namespace udf {
19 : namespace {
20 :
21 : class RegexMacrosTest : public ::testing::Test {
22 : protected:
23 9 : void SetUp() override {
24 9 : ASSERT_EQ(::duckdb_open(nullptr, &db_), ::DuckDBSuccess);
25 9 : ASSERT_EQ(::duckdb_connect(db_, &conn_), ::DuckDBSuccess);
26 9 : absl::Status reg = RegisterAll(conn_);
27 18 : ASSERT_TRUE(reg.ok()) << reg;
28 9 : }
29 :
30 9 : void TearDown() override {
31 9 : if (conn_ != nullptr) ::duckdb_disconnect(&conn_);
32 9 : if (db_ != nullptr) ::duckdb_close(&db_);
33 9 : }
34 :
35 9 : bool RunBool(const std::string& sql) {
36 9 : ::duckdb_result result;
37 9 : auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
38 18 : EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: "
39 18 : << (::duckdb_result_error(&result) == nullptr
40 18 : ? "(no error)"
41 18 : : ::duckdb_result_error(&result))
42 18 : << " (sql=" << sql << ")";
43 9 : bool v = ::duckdb_value_boolean(&result, 0, 0);
44 9 : ::duckdb_destroy_result(&result);
45 9 : return v;
46 9 : }
47 :
48 4 : std::string RunString(const std::string& sql) {
49 4 : ::duckdb_result result;
50 4 : auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
51 8 : EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: "
52 8 : << (::duckdb_result_error(&result) == nullptr
53 8 : ? "(no error)"
54 8 : : ::duckdb_result_error(&result))
55 8 : << " (sql=" << sql << ")";
56 4 : char* raw = ::duckdb_value_varchar(&result, 0, 0);
57 4 : std::string out = raw == nullptr ? std::string("") : std::string(raw);
58 4 : ::duckdb_free(raw);
59 4 : ::duckdb_destroy_result(&result);
60 4 : return out;
61 4 : }
62 :
63 5 : bool RunIsNull(const std::string& sql) {
64 5 : ::duckdb_result result;
65 5 : auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
66 10 : EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: " << sql;
67 5 : bool v = ::duckdb_value_is_null(&result, 0, 0);
68 5 : ::duckdb_destroy_result(&result);
69 5 : return v;
70 5 : }
71 :
72 : ::duckdb_database db_ = nullptr;
73 : ::duckdb_connection conn_ = nullptr;
74 : };
75 :
76 : // --- bq_regexp_contains ------------------------------------------
77 :
78 1 : TEST_F(RegexMacrosTest, RegexpContainsCommonPath) {
79 1 : EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('abc', 'b')"));
80 1 : EXPECT_FALSE(RunBool("SELECT bq_regexp_contains('abc', 'd')"));
81 1 : }
82 :
83 1 : TEST_F(RegexMacrosTest, RegexpContainsAnchoredMatch) {
84 : // Edge case pinned: `^` anchors to the start. A regression that
85 : // wrapped the regex with `^...$` (full-string match) would
86 : // break the contains-anywhere contract.
87 1 : EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('abc', '^a')"));
88 1 : EXPECT_FALSE(RunBool("SELECT bq_regexp_contains('abc', '^b')"));
89 1 : EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('abc', 'c$')"));
90 1 : }
91 :
92 1 : TEST_F(RegexMacrosTest, RegexpContainsCaseSensitiveByDefault) {
93 : // Edge case pinned: BigQuery RE2 is case-sensitive unless the
94 : // pattern includes an `(?i)` inline flag. A regression that
95 : // implicitly case-folded would surface here.
96 1 : EXPECT_FALSE(RunBool("SELECT bq_regexp_contains('ABC', 'abc')"));
97 1 : EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('ABC', 'ABC')"));
98 1 : }
99 :
100 1 : TEST_F(RegexMacrosTest, RegexpContainsHonorsInlineFlags) {
101 : // The `(?i)` inline flag is the canonical BQ way to do
102 : // case-insensitive regex (BQ does not expose a separate flags
103 : // parameter on REGEXP_CONTAINS). Verifying it round-trips
104 : // through the DuckDB RE2 binding pins the dialect compatibility.
105 1 : EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('ABC', '(?i)abc')"));
106 1 : EXPECT_TRUE(
107 1 : RunBool("SELECT bq_regexp_contains('hello\nworld', '(?s)hello.world')"));
108 1 : }
109 :
110 1 : TEST_F(RegexMacrosTest, RegexpContainsNullPropagation) {
111 1 : EXPECT_TRUE(RunIsNull("SELECT bq_regexp_contains(NULL::VARCHAR, 'a')"));
112 1 : EXPECT_TRUE(RunIsNull("SELECT bq_regexp_contains('abc', NULL::VARCHAR)"));
113 1 : }
114 :
115 : // --- bq_regexp_replace -------------------------------------------
116 :
117 1 : TEST_F(RegexMacrosTest, RegexpReplaceIsGlobal) {
118 : // Edge case pinned: BigQuery REGEXP_REPLACE replaces ALL
119 : // matches; DuckDB's `regexp_replace` defaults to only the
120 : // first. A regression that dropped the `'g'` flag would
121 : // surface here as 'baaa' instead of 'bbbb'.
122 1 : EXPECT_EQ(RunString("SELECT bq_regexp_replace('aaaa', 'a', 'b')"), "bbbb");
123 1 : EXPECT_EQ(RunString("SELECT bq_regexp_replace('foo bar foo', 'foo', 'baz')"),
124 1 : "baz bar baz");
125 1 : }
126 :
127 1 : TEST_F(RegexMacrosTest, RegexpReplaceNoMatchReturnsInput) {
128 1 : EXPECT_EQ(RunString("SELECT bq_regexp_replace('abc', 'xyz', 'q')"), "abc");
129 1 : }
130 :
131 1 : TEST_F(RegexMacrosTest, RegexpReplaceHonorsBackreferences) {
132 : // BigQuery and DuckDB both support `\1`, `\2`, ... backrefs in
133 : // the replacement string. Swapping two captures is the
134 : // canonical pin: `'(\w+) (\w+)'` -> `'\2 \1'`.
135 1 : EXPECT_EQ(RunString("SELECT bq_regexp_replace('John Doe', '(\\w+) (\\w+)', "
136 1 : "'\\2 \\1')"),
137 1 : "Doe John");
138 1 : }
139 :
140 1 : TEST_F(RegexMacrosTest, RegexpReplaceNullPropagation) {
141 1 : EXPECT_TRUE(RunIsNull("SELECT bq_regexp_replace(NULL::VARCHAR, 'a', 'b')"));
142 1 : EXPECT_TRUE(RunIsNull("SELECT bq_regexp_replace('abc', NULL::VARCHAR, 'b')"));
143 1 : EXPECT_TRUE(RunIsNull("SELECT bq_regexp_replace('abc', 'a', NULL::VARCHAR)"));
144 1 : }
145 :
146 : } // namespace
147 : } // namespace udf
148 : } // namespace duckdb
149 : } // namespace engine
150 : } // namespace backend
151 : } // namespace bigquery_emulator
|