Line data Source code
1 : #include "backend/engine/coordinator/sql_preprocess.h"
2 :
3 : #include <string>
4 :
5 : #include "absl/strings/match.h"
6 : #include "backend/catalog/create_function_util.h"
7 : #include "googlesql/public/analyzer.h"
8 : #include "googlesql/public/analyzer_options.h"
9 : #include "googlesql/public/analyzer_output.h"
10 : #include "googlesql/public/builtin_function_options.h"
11 : #include "googlesql/public/language_options.h"
12 : #include "googlesql/public/simple_catalog.h"
13 : #include "googlesql/public/types/type_factory.h"
14 : #include "googlesql/resolved_ast/resolved_ast.h"
15 : #include "googlesql/resolved_ast/resolved_node.h"
16 : #include "gtest/gtest.h"
17 :
18 : namespace bigquery_emulator {
19 : namespace backend {
20 : namespace engine {
21 : namespace coordinator {
22 : namespace {
23 :
24 5 : ::googlesql::AnalyzerOptions MakeOptions() {
25 5 : ::googlesql::LanguageOptions language;
26 5 : language.EnableMaximumLanguageFeatures();
27 5 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
28 5 : language.set_name_resolution_mode(::googlesql::NAME_RESOLUTION_DEFAULT);
29 5 : language.SetSupportsAllStatementKinds();
30 5 : ::googlesql::AnalyzerOptions options(language);
31 5 : options.set_error_message_mode(::googlesql::ERROR_MESSAGE_ONE_LINE);
32 5 : options.CreateDefaultArenasIfNotSet();
33 5 : return options;
34 5 : }
35 :
36 1 : TEST(SqlPreprocessTest, FromHexFixtureAnalyzesAfterPreprocess) {
37 1 : const std::string sql = R"(/*
38 1 : * Copyright 2020 Google LLC
39 1 : *
40 1 : * Licensed under the Apache License, Version 2.0 (the "License");
41 1 : * you may not use this file except in compliance with the License.
42 1 : * You may obtain a copy of the License at
43 1 : *
44 1 : * http://www.apache.org/licenses/LICENSE-2.0
45 1 : *
46 1 : * Unless required by applicable law or agreed to in writing, software
47 1 : * distributed under the License is distributed on an "AS IS" BASIS,
48 1 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
49 1 : * See the License for the specific language governing permissions and
50 1 : * limitations under the License.
51 1 : */
52 1 :
53 1 : -- from_hex:
54 1 : CREATE FUNCTION from_hex(value STRING)
55 1 :
56 1 : AS
57 1 : (
58 1 : (
59 1 : SELECT
60 1 : SUM(
61 1 : CAST(
62 1 : CONCAT('0x', SUBSTR(value, byte * 2 + 1, 2))
63 1 : AS INT64) << ((LENGTH(value) - (byte + 1) * 2) * 4))
64 1 : FROM UNNEST(GENERATE_ARRAY(1, LENGTH(value) / 2)) WITH OFFSET byte
65 1 : )
66 1 : );)";
67 :
68 1 : const std::string once = PreprocessSqlForAnalyzer(sql);
69 1 : const std::string twice = PreprocessSqlForAnalyzer(once);
70 1 : EXPECT_EQ(once, twice);
71 :
72 1 : ::googlesql::TypeFactory type_factory;
73 1 : ::googlesql::SimpleCatalog catalog("test", &type_factory);
74 1 : ::googlesql::LanguageOptions language;
75 1 : language.EnableMaximumLanguageFeatures();
76 1 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
77 1 : ASSERT_TRUE(catalog
78 1 : .AddBuiltinFunctionsAndTypes(
79 1 : ::googlesql::BuiltinFunctionOptions(language))
80 1 : .ok());
81 :
82 1 : ::googlesql::AnalyzerOptions options = MakeOptions();
83 1 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
84 1 : absl::Status analyzed = ::googlesql::AnalyzeStatement(
85 1 : twice, options, &catalog, &type_factory, &output);
86 2 : EXPECT_TRUE(analyzed.ok()) << analyzed << "\npreprocessed:\n" << twice;
87 1 : }
88 :
89 1 : TEST(SqlPreprocessTest, FromHexFunctionBodyAnalyzesAfterPreprocess) {
90 1 : const std::string body = R"((
91 1 : (
92 1 : SELECT
93 1 : SUM(
94 1 : CAST(
95 1 : CONCAT('0x', SUBSTR(value, byte * 2 + 1, 2))
96 1 : AS INT64) << ((LENGTH(value) - (byte + 1) * 2) * 4))
97 1 : FROM UNNEST(GENERATE_ARRAY(1, LENGTH(value) / 2)) WITH OFFSET byte
98 1 : )
99 1 : );)";
100 :
101 1 : const std::string once = PreprocessFunctionBodyForAnalyzer(body);
102 1 : const std::string twice = PreprocessFunctionBodyForAnalyzer(once);
103 1 : EXPECT_EQ(once, twice);
104 2 : EXPECT_TRUE(absl::StartsWith(once, "((")) << once;
105 2 : EXPECT_TRUE(absl::EndsWith(once, "))")) << once;
106 2 : EXPECT_EQ(once.find(';'), std::string::npos) << once;
107 1 : }
108 :
109 1 : TEST(SqlPreprocessTest, FromHexFixtureRegistersAfterPreprocess) {
110 1 : const std::string sql = R"(/*
111 1 : * Copyright 2020 Google LLC
112 1 : *
113 1 : * Licensed under the Apache License, Version 2.0 (the "License");
114 1 : * you may not use this file except in compliance with the License.
115 1 : * You may obtain a copy of the License at
116 1 : *
117 1 : * http://www.apache.org/licenses/LICENSE-2.0
118 1 : *
119 1 : * Unless required by applicable law or agreed to in writing, software
120 1 : * distributed under the License is distributed on an "AS IS" BASIS,
121 1 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
122 1 : * See the License for the specific language governing permissions and
123 1 : * limitations under the License.
124 1 : */
125 1 :
126 1 : -- from_hex:
127 1 : -- Input: STRING representing a number in hexadecimal form
128 1 : -- Output: INT64 number in decimal form
129 1 : CREATE FUNCTION from_hex(value STRING)
130 1 :
131 1 : AS
132 1 : (
133 1 : (
134 1 : SELECT
135 1 : SUM(
136 1 : CAST(
137 1 : CONCAT('0x', SUBSTR(value, byte * 2 + 1, 2))
138 1 : AS INT64) << ((LENGTH(value) - (byte + 1) * 2) * 4))
139 1 : FROM UNNEST(GENERATE_ARRAY(1, LENGTH(value) / 2)) WITH OFFSET byte
140 1 : )
141 1 : );)";
142 :
143 1 : const std::string preprocessed = PreprocessSqlForAnalyzer(sql);
144 :
145 1 : ::googlesql::TypeFactory type_factory;
146 1 : ::googlesql::SimpleCatalog catalog("test", &type_factory);
147 1 : ::googlesql::LanguageOptions language;
148 1 : language.EnableMaximumLanguageFeatures();
149 1 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
150 1 : ASSERT_TRUE(catalog
151 1 : .AddBuiltinFunctionsAndTypes(
152 1 : ::googlesql::BuiltinFunctionOptions(language))
153 1 : .ok());
154 :
155 1 : ::googlesql::AnalyzerOptions options = MakeOptions();
156 1 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
157 1 : ASSERT_TRUE(::googlesql::AnalyzeStatement(
158 1 : preprocessed, options, &catalog, &type_factory, &output)
159 1 : .ok());
160 1 : const auto* create_fn =
161 1 : output->resolved_statement()
162 1 : ->GetAs<::googlesql::ResolvedCreateFunctionStmt>();
163 1 : ASSERT_NE(create_fn, nullptr);
164 1 : const std::string code = create_fn->code();
165 2 : EXPECT_FALSE(code.empty()) << "preprocessed:\n" << preprocessed;
166 2 : EXPECT_NE(code[0], ')') << "code():\n" << code;
167 1 : absl::StatusOr<std::unique_ptr<const ::googlesql::Function>> fn =
168 1 : bigquery_emulator::backend::catalog::MakeFunctionFromCreateFunction(
169 1 : *create_fn, /*function_options=*/nullptr);
170 2 : EXPECT_TRUE(fn.ok()) << fn.status() << "\ncode():\n" << code;
171 1 : }
172 :
173 1 : TEST(SqlPreprocessTest, CwToBaseWithBodyAnalyzesAfterPreprocess) {
174 1 : const std::string sql =
175 1 : R"(CREATE FUNCTION cw_to_base(number INT64, base INT64) RETURNS STRING
176 1 :
177 1 : AS (
178 1 : (WITH chars AS (
179 1 : SELECT 1 AS ch, 1 AS idx
180 1 : )
181 1 : SELECT 'x' AS to_base FROM chars)
182 1 : );)";
183 :
184 1 : const std::string once = PreprocessSqlForAnalyzer(sql);
185 1 : const std::string twice = PreprocessSqlForAnalyzer(once);
186 1 : EXPECT_EQ(once, twice);
187 2 : EXPECT_TRUE(absl::StrContains(once, "AS (( WITH")) << once;
188 :
189 1 : ::googlesql::TypeFactory type_factory;
190 1 : ::googlesql::SimpleCatalog catalog("test", &type_factory);
191 1 : ::googlesql::LanguageOptions language;
192 1 : language.EnableMaximumLanguageFeatures();
193 1 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
194 1 : ASSERT_TRUE(catalog
195 1 : .AddBuiltinFunctionsAndTypes(
196 1 : ::googlesql::BuiltinFunctionOptions(language))
197 1 : .ok());
198 :
199 1 : ::googlesql::AnalyzerOptions options = MakeOptions();
200 1 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
201 1 : absl::Status analyzed = ::googlesql::AnalyzeStatement(
202 1 : twice, options, &catalog, &type_factory, &output);
203 2 : EXPECT_TRUE(analyzed.ok()) << analyzed << "\npreprocessed:\n" << twice;
204 1 : }
205 :
206 1 : TEST(SqlPreprocessTest, CwFromBaseFixtureAnalyzesAfterPreprocess) {
207 1 : const std::string sql =
208 1 : R"(CREATE FUNCTION cw_from_base(number STRING, base INT64) RETURNS INT64
209 1 :
210 1 : AS (
211 1 : (WITH chars AS (
212 1 : SELECT IF(ch >= 48 AND ch <= 57, ch - 48, IF(ch >= 65 AND ch <= 90, ch - 65 + 10, ch - 97 + 10)) pos, offset + 1 AS idx
213 1 : FROM UNNEST(TO_CODE_POINTS(number)) AS ch WITH OFFSET
214 1 : )
215 1 : SELECT SAFE_CAST(SUM(pos*CAST(POW(base, CHAR_LENGTH(number) - idx) AS NUMERIC)) AS INT64) from_base FROM chars)
216 1 : );)";
217 :
218 1 : const std::string preprocessed = PreprocessSqlForAnalyzer(sql);
219 1 : const std::string twice = PreprocessSqlForAnalyzer(preprocessed);
220 1 : EXPECT_EQ(preprocessed, twice);
221 2 : EXPECT_TRUE(absl::StrContains(preprocessed, "AS (( WITH")) << preprocessed;
222 :
223 1 : ::googlesql::TypeFactory type_factory;
224 1 : ::googlesql::SimpleCatalog catalog("test", &type_factory);
225 1 : ::googlesql::LanguageOptions language;
226 1 : language.EnableMaximumLanguageFeatures();
227 1 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
228 1 : ASSERT_TRUE(catalog
229 1 : .AddBuiltinFunctionsAndTypes(
230 1 : ::googlesql::BuiltinFunctionOptions(language))
231 1 : .ok());
232 :
233 1 : ::googlesql::AnalyzerOptions options = MakeOptions();
234 1 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
235 2 : ASSERT_TRUE(::googlesql::AnalyzeStatement(
236 2 : preprocessed, options, &catalog, &type_factory, &output)
237 2 : .ok())
238 2 : << preprocessed;
239 1 : }
240 :
241 1 : TEST(SqlPreprocess, RewritesIntegerTypeAliasInQuery) {
242 1 : const std::string sql =
243 1 : "SELECT STRUCT(CAST(2.89 AS FLOAT64) AS t_value, CAST(21 AS INTEGER) AS "
244 1 : "dof)";
245 1 : const std::string out = PreprocessSqlForAnalyzer(sql);
246 2 : EXPECT_TRUE(absl::StrContains(out, "21 AS dof")) << out;
247 2 : EXPECT_FALSE(absl::StrContains(out, "INTEGER")) << out;
248 2 : EXPECT_FALSE(absl::StrContains(out, "CAST(21 AS")) << out;
249 1 : }
250 :
251 1 : TEST(SqlPreprocess, RewritesFormatTypeLiteralStandalone) {
252 1 : const std::string sql = "FORMAT('%T', input)";
253 1 : const std::string out = PreprocessSqlForAnalyzer(sql);
254 1 : EXPECT_EQ("emu_format_t(input)", out);
255 1 : }
256 :
257 1 : TEST(SqlPreprocess, PreservesDoubleQuotedStringLiterals) {
258 1 : const std::string sql =
259 1 : R"(SELECT "FirstLine\nSeattleKirkland\nAnotherLine" LIKE 'F%Seattle%Line')";
260 1 : const std::string out = PreprocessSqlForAnalyzer(sql);
261 1 : EXPECT_EQ(sql, out);
262 1 : }
263 :
264 1 : TEST(SqlPreprocess, PreservesSingleQuotedStringLiterals) {
265 1 : const std::string sql = "SELECT 'SeattleKirkland' LIKE 'Seattle%'";
266 1 : const std::string out = PreprocessSqlForAnalyzer(sql);
267 1 : EXPECT_EQ(sql, out);
268 1 : }
269 :
270 1 : TEST(SqlPreprocess, RewritesStructInt64LiteralCastsInTTestQuery) {
271 1 : const std::string sql =
272 1 : "SELECT TO_JSON_STRING(STRUCT(CAST(2.8957935572829476 AS FLOAT64) AS "
273 1 : "t_value, CAST(21 AS INTEGER) AS dof))";
274 1 : const std::string out = PreprocessSqlForAnalyzer(sql);
275 2 : EXPECT_TRUE(
276 2 : absl::StrContains(out, "CAST(2.8957935572829476 AS FLOAT64) AS t_value"))
277 2 : << out;
278 2 : EXPECT_TRUE(absl::StrContains(out, "21 AS dof")) << out;
279 2 : EXPECT_FALSE(absl::StrContains(out, "CAST(21 AS")) << out;
280 :
281 1 : ::googlesql::TypeFactory type_factory;
282 1 : ::googlesql::SimpleCatalog catalog("test", &type_factory);
283 1 : ::googlesql::LanguageOptions language;
284 1 : language.EnableMaximumLanguageFeatures();
285 1 : language.set_product_mode(::googlesql::PRODUCT_EXTERNAL);
286 1 : ASSERT_TRUE(catalog
287 1 : .AddBuiltinFunctionsAndTypes(
288 1 : ::googlesql::BuiltinFunctionOptions(language))
289 1 : .ok());
290 1 : ::googlesql::AnalyzerOptions options = MakeOptions();
291 1 : std::unique_ptr<const ::googlesql::AnalyzerOutput> output;
292 2 : EXPECT_TRUE(::googlesql::AnalyzeStatement(
293 2 : out, options, &catalog, &type_factory, &output)
294 2 : .ok())
295 2 : << out;
296 1 : }
297 :
298 : } // namespace
299 : } // namespace coordinator
300 : } // namespace engine
301 : } // namespace backend
302 : } // namespace bigquery_emulator
|