LCOV - code coverage report
Current view: top level - backend/engine/duckdb/udf/regex - regex_macros_test.cc (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 100.0 % 84 84
Test Date: 2026-07-02 21:01:18 Functions: 100.0 % 14 14

            Line data    Source code
       1              : // Unit tests for the BigQuery regex polyfill macros.
       2              : //
       3              : // Each test drives the macro directly against an in-process DuckDB
       4              : // connection and exercises both the common path and the
       5              : // BigQuery-specific edge case the wrapper exists to pin.
       6              : 
       7              : #include <string>
       8              : 
       9              : #include "absl/status/status.h"
      10              : #include "backend/engine/duckdb/udf/registrar.h"
      11              : #include "duckdb.h"
      12              : #include "gtest/gtest.h"
      13              : 
      14              : namespace bigquery_emulator {
      15              : namespace backend {
      16              : namespace engine {
      17              : namespace duckdb {
      18              : namespace udf {
      19              : namespace {
      20              : 
      21              : class RegexMacrosTest : public ::testing::Test {
      22              :  protected:
      23            9 :   void SetUp() override {
      24            9 :     ASSERT_EQ(::duckdb_open(nullptr, &db_), ::DuckDBSuccess);
      25            9 :     ASSERT_EQ(::duckdb_connect(db_, &conn_), ::DuckDBSuccess);
      26            9 :     absl::Status reg = RegisterAll(conn_);
      27           18 :     ASSERT_TRUE(reg.ok()) << reg;
      28            9 :   }
      29              : 
      30            9 :   void TearDown() override {
      31            9 :     if (conn_ != nullptr) ::duckdb_disconnect(&conn_);
      32            9 :     if (db_ != nullptr) ::duckdb_close(&db_);
      33            9 :   }
      34              : 
      35            9 :   bool RunBool(const std::string& sql) {
      36            9 :     ::duckdb_result result;
      37            9 :     auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
      38           18 :     EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: "
      39           18 :                                    << (::duckdb_result_error(&result) == nullptr
      40           18 :                                            ? "(no error)"
      41           18 :                                            : ::duckdb_result_error(&result))
      42           18 :                                    << " (sql=" << sql << ")";
      43            9 :     bool v = ::duckdb_value_boolean(&result, 0, 0);
      44            9 :     ::duckdb_destroy_result(&result);
      45            9 :     return v;
      46            9 :   }
      47              : 
      48            4 :   std::string RunString(const std::string& sql) {
      49            4 :     ::duckdb_result result;
      50            4 :     auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
      51            8 :     EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: "
      52            8 :                                    << (::duckdb_result_error(&result) == nullptr
      53            8 :                                            ? "(no error)"
      54            8 :                                            : ::duckdb_result_error(&result))
      55            8 :                                    << " (sql=" << sql << ")";
      56            4 :     char* raw = ::duckdb_value_varchar(&result, 0, 0);
      57            4 :     std::string out = raw == nullptr ? std::string("") : std::string(raw);
      58            4 :     ::duckdb_free(raw);
      59            4 :     ::duckdb_destroy_result(&result);
      60            4 :     return out;
      61            4 :   }
      62              : 
      63            5 :   bool RunIsNull(const std::string& sql) {
      64            5 :     ::duckdb_result result;
      65            5 :     auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
      66           10 :     EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: " << sql;
      67            5 :     bool v = ::duckdb_value_is_null(&result, 0, 0);
      68            5 :     ::duckdb_destroy_result(&result);
      69            5 :     return v;
      70            5 :   }
      71              : 
      72              :   ::duckdb_database db_ = nullptr;
      73              :   ::duckdb_connection conn_ = nullptr;
      74              : };
      75              : 
      76              : // --- bq_regexp_contains ------------------------------------------
      77              : 
      78            1 : TEST_F(RegexMacrosTest, RegexpContainsCommonPath) {
      79            1 :   EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('abc', 'b')"));
      80            1 :   EXPECT_FALSE(RunBool("SELECT bq_regexp_contains('abc', 'd')"));
      81            1 : }
      82              : 
      83            1 : TEST_F(RegexMacrosTest, RegexpContainsAnchoredMatch) {
      84              :   // Edge case pinned: `^` anchors to the start. A regression that
      85              :   // wrapped the regex with `^...$` (full-string match) would
      86              :   // break the contains-anywhere contract.
      87            1 :   EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('abc', '^a')"));
      88            1 :   EXPECT_FALSE(RunBool("SELECT bq_regexp_contains('abc', '^b')"));
      89            1 :   EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('abc', 'c$')"));
      90            1 : }
      91              : 
      92            1 : TEST_F(RegexMacrosTest, RegexpContainsCaseSensitiveByDefault) {
      93              :   // Edge case pinned: BigQuery RE2 is case-sensitive unless the
      94              :   // pattern includes an `(?i)` inline flag. A regression that
      95              :   // implicitly case-folded would surface here.
      96            1 :   EXPECT_FALSE(RunBool("SELECT bq_regexp_contains('ABC', 'abc')"));
      97            1 :   EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('ABC', 'ABC')"));
      98            1 : }
      99              : 
     100            1 : TEST_F(RegexMacrosTest, RegexpContainsHonorsInlineFlags) {
     101              :   // The `(?i)` inline flag is the canonical BQ way to do
     102              :   // case-insensitive regex (BQ does not expose a separate flags
     103              :   // parameter on REGEXP_CONTAINS). Verifying it round-trips
     104              :   // through the DuckDB RE2 binding pins the dialect compatibility.
     105            1 :   EXPECT_TRUE(RunBool("SELECT bq_regexp_contains('ABC', '(?i)abc')"));
     106            1 :   EXPECT_TRUE(
     107            1 :       RunBool("SELECT bq_regexp_contains('hello\nworld', '(?s)hello.world')"));
     108            1 : }
     109              : 
     110            1 : TEST_F(RegexMacrosTest, RegexpContainsNullPropagation) {
     111            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_regexp_contains(NULL::VARCHAR, 'a')"));
     112            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_regexp_contains('abc', NULL::VARCHAR)"));
     113            1 : }
     114              : 
     115              : // --- bq_regexp_replace -------------------------------------------
     116              : 
     117            1 : TEST_F(RegexMacrosTest, RegexpReplaceIsGlobal) {
     118              :   // Edge case pinned: BigQuery REGEXP_REPLACE replaces ALL
     119              :   // matches; DuckDB's `regexp_replace` defaults to only the
     120              :   // first. A regression that dropped the `'g'` flag would
     121              :   // surface here as 'baaa' instead of 'bbbb'.
     122            1 :   EXPECT_EQ(RunString("SELECT bq_regexp_replace('aaaa', 'a', 'b')"), "bbbb");
     123            1 :   EXPECT_EQ(RunString("SELECT bq_regexp_replace('foo bar foo', 'foo', 'baz')"),
     124            1 :             "baz bar baz");
     125            1 : }
     126              : 
     127            1 : TEST_F(RegexMacrosTest, RegexpReplaceNoMatchReturnsInput) {
     128            1 :   EXPECT_EQ(RunString("SELECT bq_regexp_replace('abc', 'xyz', 'q')"), "abc");
     129            1 : }
     130              : 
     131            1 : TEST_F(RegexMacrosTest, RegexpReplaceHonorsBackreferences) {
     132              :   // BigQuery and DuckDB both support `\1`, `\2`, ... backrefs in
     133              :   // the replacement string. Swapping two captures is the
     134              :   // canonical pin: `'(\w+) (\w+)'` -> `'\2 \1'`.
     135            1 :   EXPECT_EQ(RunString("SELECT bq_regexp_replace('John Doe', '(\\w+) (\\w+)', "
     136            1 :                       "'\\2 \\1')"),
     137            1 :             "Doe John");
     138            1 : }
     139              : 
     140            1 : TEST_F(RegexMacrosTest, RegexpReplaceNullPropagation) {
     141            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_regexp_replace(NULL::VARCHAR, 'a', 'b')"));
     142            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_regexp_replace('abc', NULL::VARCHAR, 'b')"));
     143            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_regexp_replace('abc', 'a', NULL::VARCHAR)"));
     144            1 : }
     145              : 
     146              : }  // namespace
     147              : }  // namespace udf
     148              : }  // namespace duckdb
     149              : }  // namespace engine
     150              : }  // namespace backend
     151              : }  // namespace bigquery_emulator
        

Generated by: LCOV version 2.0-1