Line data Source code
1 : #include "backend/engine/duckdb/transpiler/transpiler_test_fixture.h"
2 :
3 : namespace bigquery_emulator {
4 : namespace backend {
5 : namespace engine {
6 : namespace duckdb {
7 : namespace transpiler {
8 :
9 : // --- Window / Analytic --------------------------------------------------
10 :
11 1 : TEST_F(TranspilerTest, EmitAnalyticScanRowNumber) {
12 : // `ROW_NUMBER() OVER (ORDER BY id)` lowers to a ResolvedAnalyticScan
13 : // whose only group has a null partition_by and a single-item
14 : // order_by; the analytic function list carries one
15 : // ResolvedAnalyticFunctionCall (`row_number`, no args). The
16 : // synthesized output column is `$analytic1`.
17 1 : const ::googlesql::ResolvedStatement* stmt =
18 1 : Analyze("SELECT ROW_NUMBER() OVER (ORDER BY id) FROM people");
19 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
20 1 : ASSERT_NE(scan, nullptr);
21 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ANALYTIC_SCAN);
22 1 : TestTranspiler t;
23 1 : const std::string kPeopleWithRn =
24 1 : "SELECT *, row_number() OVER () AS \"__bq_input_rn\" FROM (SELECT "
25 1 : "\"id\", \"name\" FROM \"people\")";
26 1 : EXPECT_EQ(
27 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>()),
28 1 : "SELECT *, ROW_NUMBER() OVER (ORDER BY \"id\" ASC NULLS FIRST) AS "
29 1 : "\"$analytic1\" FROM (" +
30 1 : kPeopleWithRn + ")");
31 1 : }
32 :
33 1 : TEST_F(TranspilerTest, EmitAnalyticScanRankPartitionByOrderBy) {
34 : // `RANK() OVER (PARTITION BY name ORDER BY id DESC)` exercises both
35 : // the partition_by and the explicit-direction order_by paths. The
36 : // partition spec emits one PARTITION BY column and the order spec
37 : // emits the explicit DESC keyword.
38 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
39 1 : "SELECT RANK() OVER (PARTITION BY name ORDER BY id DESC) FROM people");
40 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
41 1 : ASSERT_NE(scan, nullptr);
42 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ANALYTIC_SCAN);
43 1 : TestTranspiler t;
44 1 : const std::string kPeopleWithRn =
45 1 : "SELECT *, row_number() OVER () AS \"__bq_input_rn\" FROM (SELECT "
46 1 : "\"id\", \"name\" FROM \"people\")";
47 1 : EXPECT_EQ(
48 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>()),
49 1 : "SELECT *, RANK() OVER (PARTITION BY \"name\" ORDER BY \"id\" DESC "
50 1 : "NULLS LAST) AS \"$analytic1\" FROM (" +
51 1 : kPeopleWithRn + ")");
52 1 : }
53 :
54 1 : TEST_F(TranspilerTest, EmitAnalyticScanDenseRank) {
55 : // DENSE_RANK is the third ranking analytic the plan calls out; the
56 : // test mirrors RANK so we get explicit coverage of the disposition
57 : // row in `functions.yaml` (`dense_rank: DENSE_RANK`).
58 1 : const ::googlesql::ResolvedStatement* stmt =
59 1 : Analyze("SELECT DENSE_RANK() OVER (ORDER BY id) FROM people");
60 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
61 1 : ASSERT_NE(scan, nullptr);
62 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ANALYTIC_SCAN);
63 1 : TestTranspiler t;
64 1 : const std::string kPeopleWithRn =
65 1 : "SELECT *, row_number() OVER () AS \"__bq_input_rn\" FROM (SELECT "
66 1 : "\"id\", \"name\" FROM \"people\")";
67 1 : EXPECT_EQ(
68 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>()),
69 1 : "SELECT *, DENSE_RANK() OVER (ORDER BY \"id\" ASC NULLS FIRST) AS "
70 1 : "\"$analytic1\" FROM (" +
71 1 : kPeopleWithRn + ")");
72 1 : }
73 :
74 1 : TEST_F(TranspilerTest, EmitAnalyticScanSumOverWithFrame) {
75 : // Aggregate-over-window with an explicit ROWS frame. SUM is a
76 : // `kDuckdbNative` entry shared with the scalar aggregate emit, so
77 : // the
78 : // analytic path renders it the same way (`SUM(<expr>)`) and the
79 : // OVER clause carries the ROWS BETWEEN bound. UNBOUNDED PRECEDING
80 : // / CURRENT ROW are both supported boundary types.
81 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
82 1 : "SELECT SUM(id) OVER (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING "
83 1 : "AND CURRENT ROW) FROM people");
84 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
85 1 : ASSERT_NE(scan, nullptr);
86 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ANALYTIC_SCAN);
87 1 : TestTranspiler t;
88 1 : const std::string kPeopleWithRn =
89 1 : "SELECT *, row_number() OVER () AS \"__bq_input_rn\" FROM (SELECT "
90 1 : "\"id\", \"name\" FROM \"people\")";
91 1 : EXPECT_EQ(
92 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>()),
93 1 : "SELECT *, SUM(\"id\") OVER (ORDER BY \"id\" ASC NULLS FIRST ROWS "
94 1 : "BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS \"$analytic1\" FROM (" +
95 1 : kPeopleWithRn + ")");
96 1 : }
97 :
98 1 : TEST_F(TranspilerTest, EmitAnalyticScanCountStarOverPartition) {
99 : // COUNT(*) lowers through the `$count_star` special case both in
100 : // the aggregate path and in the analytic path -- the analyzer
101 : // gives us an empty argument_list and the function name
102 : // `$count_star`. With a PARTITION-BY-only OVER clause the analyzer
103 : // synthesizes a `ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED
104 : // FOLLOWING` frame for aggregate analytic functions, so the emit
105 : // surfaces that frame even though the user didn't spell it.
106 1 : const ::googlesql::ResolvedStatement* stmt =
107 1 : Analyze("SELECT COUNT(*) OVER (PARTITION BY name) FROM people");
108 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
109 1 : ASSERT_NE(scan, nullptr);
110 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ANALYTIC_SCAN);
111 1 : TestTranspiler t;
112 1 : const std::string kPeopleWithRn =
113 1 : "SELECT *, row_number() OVER () AS \"__bq_input_rn\" FROM (SELECT "
114 1 : "\"id\", \"name\" FROM \"people\")";
115 1 : EXPECT_EQ(
116 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>()),
117 1 : "SELECT *, COUNT(*) OVER (PARTITION BY \"name\" ROWS BETWEEN "
118 1 : "UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS \"$analytic1\" FROM (" +
119 1 : kPeopleWithRn + ")");
120 1 : }
121 :
122 1 : TEST_F(TranspilerTest, EmitAnalyticScanSafeAggregateFallsBack) {
123 : // `SAFE.SUM(id) OVER (ORDER BY id)` analyzes cleanly (SAFE is a
124 : // function-call decoration, not an OVER-time modifier) but sets
125 : // `error_mode = SAFE_ERROR_MODE`. The per-call SAFE short-circuit
126 : // returns "" and the analytic emit propagates the empty string,
127 : // so the engine surfaces UNIMPLEMENTED.
128 1 : const ::googlesql::ResolvedStatement* stmt =
129 1 : Analyze("SELECT SAFE.SUM(id) OVER (ORDER BY id) FROM people");
130 1 : if (stmt == nullptr) {
131 0 : GTEST_SKIP() << "analyzer rejected SAFE aggregate OVER -- skip";
132 0 : }
133 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
134 1 : if (scan == nullptr ||
135 1 : scan->node_kind() != ::googlesql::RESOLVED_ANALYTIC_SCAN) {
136 0 : GTEST_SKIP() << "analyzer produced non-analytic scan -- skip";
137 0 : }
138 1 : TestTranspiler t;
139 1 : EXPECT_EQ(
140 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>()), "");
141 1 : }
142 :
143 1 : TEST_F(TranspilerTest, EmitAnalyticScanFirstValueIgnoreNulls) {
144 1 : const ::googlesql::ResolvedStatement* stmt = Analyze(
145 1 : "SELECT FIRST_VALUE(id IGNORE NULLS) OVER (ORDER BY id) FROM people");
146 1 : const ::googlesql::ResolvedScan* scan = QueryInputScan(stmt);
147 1 : ASSERT_NE(scan, nullptr);
148 1 : ASSERT_EQ(scan->node_kind(), ::googlesql::RESOLVED_ANALYTIC_SCAN);
149 1 : TestTranspiler t;
150 1 : std::string sql =
151 1 : t.EmitAnalyticScan(scan->GetAs<::googlesql::ResolvedAnalyticScan>());
152 2 : EXPECT_NE(sql.find("FIRST_VALUE(\"id\" IGNORE NULLS) OVER"),
153 2 : std::string::npos)
154 2 : << sql;
155 1 : }
156 :
157 : // --- Top-level SELECT (QueryStmt / ProjectScan / SingleRowScan /
158 : // OutputColumn / ComputedColumn) -----------------------------------
159 :
160 : } // namespace transpiler
161 : } // namespace duckdb
162 : } // namespace engine
163 : } // namespace backend
164 : } // namespace bigquery_emulator
|