LCOV - code coverage report
Current view: top level - backend/engine/duckdb/udf/string - string_macros_test.cc (source / functions) Coverage Total Hit
Test: _coverage_report.dat Lines: 90.9 % 88 80
Test Date: 2026-07-02 21:01:18 Functions: 100.0 % 13 13

            Line data    Source code
       1              : // Unit tests for the BigQuery string polyfill macros.
       2              : //
       3              : // Each test drives the macro directly against an in-process DuckDB
       4              : // connection and exercises both the common path and the
       5              : // BigQuery-specific edge case the wrapper exists to pin.
       6              : 
       7              : #include <cstdint>
       8              : #include <string>
       9              : 
      10              : #include "absl/status/status.h"
      11              : #include "backend/engine/duckdb/udf/registrar.h"
      12              : #include "duckdb.h"
      13              : #include "gtest/gtest.h"
      14              : 
      15              : namespace bigquery_emulator {
      16              : namespace backend {
      17              : namespace engine {
      18              : namespace duckdb {
      19              : namespace udf {
      20              : namespace {
      21              : 
      22              : class StringMacrosTest : public ::testing::Test {
      23              :  protected:
      24            8 :   void SetUp() override {
      25            8 :     ASSERT_EQ(::duckdb_open(nullptr, &db_), ::DuckDBSuccess);
      26            8 :     ASSERT_EQ(::duckdb_connect(db_, &conn_), ::DuckDBSuccess);
      27            8 :     absl::Status reg = RegisterAll(conn_);
      28           16 :     ASSERT_TRUE(reg.ok()) << reg;
      29            8 :   }
      30              : 
      31            8 :   void TearDown() override {
      32            8 :     if (conn_ != nullptr) ::duckdb_disconnect(&conn_);
      33            8 :     if (db_ != nullptr) ::duckdb_close(&db_);
      34            8 :   }
      35              : 
      36            4 :   int64_t RunInt64(const std::string& sql) {
      37            4 :     ::duckdb_result result;
      38            4 :     auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
      39            8 :     EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: "
      40            8 :                                    << (::duckdb_result_error(&result) == nullptr
      41            8 :                                            ? "(no error)"
      42            8 :                                            : ::duckdb_result_error(&result))
      43            8 :                                    << " (sql=" << sql << ")";
      44            4 :     int64_t v = ::duckdb_value_int64(&result, 0, 0);
      45            4 :     ::duckdb_destroy_result(&result);
      46            4 :     return v;
      47            4 :   }
      48              : 
      49            3 :   bool RunIsNull(const std::string& sql) {
      50            3 :     ::duckdb_result result;
      51            3 :     auto rc = ::duckdb_query(conn_, sql.c_str(), &result);
      52            6 :     EXPECT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: " << sql;
      53            3 :     bool v = ::duckdb_value_is_null(&result, 0, 0);
      54            3 :     ::duckdb_destroy_result(&result);
      55            3 :     return v;
      56            3 :   }
      57              : 
      58              :   ::duckdb_database db_ = nullptr;
      59              :   ::duckdb_connection conn_ = nullptr;
      60              : };
      61              : 
      62              : // --- bq_strpos ---------------------------------------------------
      63              : 
      64            1 : TEST_F(StringMacrosTest, StrposReturnsOneBasedIndex) {
      65              :   // Edge case pinned: BigQuery STRPOS is 1-based. A regression
      66              :   // that returned a 0-based index would surface here as 2 instead
      67              :   // of 3.
      68            1 :   EXPECT_EQ(RunInt64("SELECT bq_strpos('hello', 'll')"), 3);
      69            1 :   EXPECT_EQ(RunInt64("SELECT bq_strpos('hello', 'h')"), 1);
      70            1 : }
      71              : 
      72            1 : TEST_F(StringMacrosTest, StrposMissingNeedleReturnsZero) {
      73              :   // Edge case pinned: BigQuery STRPOS returns 0 (NOT -1, NOT NULL)
      74              :   // when the needle is not found.
      75            1 :   EXPECT_EQ(RunInt64("SELECT bq_strpos('hello', 'zz')"), 0);
      76            1 : }
      77              : 
      78            1 : TEST_F(StringMacrosTest, StrposEmptyNeedle) {
      79              :   // Edge case pinned: BigQuery STRPOS('abc', '') returns 1
      80              :   // (empty substring matches at position 1).
      81            1 :   EXPECT_EQ(RunInt64("SELECT bq_strpos('abc', '')"), 1);
      82            1 : }
      83              : 
      84            1 : TEST_F(StringMacrosTest, StrposNullPropagation) {
      85            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_strpos(NULL::VARCHAR, 'll')"));
      86            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_strpos('hello', NULL::VARCHAR)"));
      87            1 :   EXPECT_TRUE(RunIsNull("SELECT bq_strpos(NULL::VARCHAR, NULL::VARCHAR)"));
      88            1 : }
      89              : 
      90              : // --- bq_split ----------------------------------------------------
      91              : 
      92              : // Helper for LIST-returning macros: wraps the macro call in
      93              : // `list_aggregate(<call>, 'string_agg', '|')` so the result is a
      94              : // single VARCHAR with the list elements joined by `|`. The
      95              : // per-test assertions compare that joined form (easier to eyeball
      96              : // than a manually-walked column buffer; the macro's contract is
      97              : // fully captured by the ordered set of elements).
      98              : //
      99              : // Note: BigQuery SPLIT's return type is LIST(VARCHAR); DuckDB's
     100              : // `string_split` returns the same shape, and DuckDB's
     101              : // `array_to_string(list, sep)` flattens it. We use `'|'` as the
     102              : // join separator so test fixtures that include `,` in the data
     103              : // stay readable.
     104            5 : std::string JoinSplit(::duckdb_connection conn, const std::string& macro_call) {
     105            5 :   ::duckdb_result result;
     106            5 :   const std::string wrapped = "SELECT array_to_string(" + macro_call + ", '|')";
     107            5 :   auto rc = ::duckdb_query(conn, wrapped.c_str(), &result);
     108            5 :   if (rc != ::DuckDBSuccess) {
     109            0 :     ADD_FAILURE() << "DuckDB rejected: "
     110            0 :                   << (::duckdb_result_error(&result) == nullptr
     111            0 :                           ? "(no error)"
     112            0 :                           : ::duckdb_result_error(&result))
     113            0 :                   << " (sql=" << wrapped << ")";
     114            0 :     ::duckdb_destroy_result(&result);
     115            0 :     return "(rejected)";
     116            0 :   }
     117            5 :   char* raw = ::duckdb_value_varchar(&result, 0, 0);
     118            5 :   std::string out = raw == nullptr ? std::string("") : std::string(raw);
     119            5 :   ::duckdb_free(raw);
     120            5 :   ::duckdb_destroy_result(&result);
     121            5 :   return out;
     122            5 : }
     123              : 
     124            1 : TEST_F(StringMacrosTest, SplitDefaultDelimiterIsComma) {
     125              :   // Edge case pinned: BigQuery SPLIT(value) (single-arg form)
     126              :   // defaults to splitting on `,`. A regression that registered
     127              :   // the macro without a `delimiter := ','` default would surface
     128              :   // here as a binder error.
     129            1 :   EXPECT_EQ(JoinSplit(conn_, "bq_split('a,b,c')"), "a|b|c");
     130            1 :   EXPECT_EQ(JoinSplit(conn_, "bq_split('one,two')"), "one|two");
     131            1 : }
     132              : 
     133            1 : TEST_F(StringMacrosTest, SplitCustomDelimiter) {
     134            1 :   EXPECT_EQ(JoinSplit(conn_, "bq_split('a;b;c', ';')"), "a|b|c");
     135            1 :   EXPECT_EQ(JoinSplit(conn_, "bq_split('foo-bar-baz', '-')"), "foo|bar|baz");
     136            1 : }
     137              : 
     138            1 : TEST_F(StringMacrosTest, SplitEmptyInputReturnsSingleEmpty) {
     139              :   // Edge case pinned: BigQuery SPLIT('', ',') returns a list
     140              :   // containing one empty string (BQ contract: a non-empty
     141              :   // delimiter always splits, even an empty input). DuckDB's
     142              :   // string_split agrees today; the test pins it.
     143            1 :   EXPECT_EQ(JoinSplit(conn_, "bq_split('', ',')"), "");
     144            1 : }
     145              : 
     146            1 : TEST_F(StringMacrosTest, SplitNullPropagation) {
     147              :   // Wrap in a `r IS NULL` SQL boolean rather than relying on
     148              :   // `duckdb_value_is_null` for LIST cells; the C API's NULL
     149              :   // detector for LIST columns is shape-sensitive across DuckDB
     150              :   // versions, and a SQL-level `IS NULL` is unambiguous.
     151            1 :   ::duckdb_result result;
     152            1 :   for (const char* sql : {
     153            1 :            "SELECT bq_split(NULL::VARCHAR) IS NULL",
     154            1 :            "SELECT bq_split('a,b,c', NULL::VARCHAR) IS NULL",
     155            2 :        }) {
     156            2 :     auto rc = ::duckdb_query(conn_, sql, &result);
     157            4 :     ASSERT_EQ(rc, ::DuckDBSuccess) << "DuckDB rejected: " << sql;
     158            4 :     EXPECT_TRUE(::duckdb_value_boolean(&result, 0, 0))
     159            4 :         << "expected NULL propagation for " << sql;
     160            2 :     ::duckdb_destroy_result(&result);
     161            2 :   }
     162            1 : }
     163              : 
     164              : }  // namespace
     165              : }  // namespace udf
     166              : }  // namespace duckdb
     167              : }  // namespace engine
     168              : }  // namespace backend
     169              : }  // namespace bigquery_emulator
        

Generated by: LCOV version 2.0-1