From 0c32f5ebbb9551a03cfda52114be48516ef80bb7 Mon Sep 17 00:00:00 2001 From: Rene Cannao Date: Sat, 23 May 2026 02:19:53 +0000 Subject: [PATCH] fix(set): handle PostgreSQL SET TIME ZONE alias and multi-value lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two PG SET parser bugs surfaced when the algorithm=3 (ParserSQL) SET path was first actually exercised in ProxySQL CI: ## Bug 1: SET TIME ZONE mis-parsed Per the PostgreSQL docs, `SET TIME ZONE ` is an alias for `SET TimeZone = `. The tokenizer has no dedicated TK_TIME or TK_ZONE keyword, so the input was lexed as IDENTIFIER("TIME") followed by IDENTIFIER("ZONE"). `parse_variable_assignment()` saw "TIME" as the variable target and "ZONE" as the unquoted RHS identifier value, producing the assignment `time = ZONE`, and any trailing value (`'UTC'`, `DEFAULT`, an INTERVAL expression, etc.) was silently dropped on the floor. Fix: at the top of `parse()`, in PostgreSQL dialect, peek for IDENTIFIER("TIME") followed by IDENTIFIER("ZONE") (case-insensitive via StringRef::equals_ci) and synthesise a `timezone = ` assignment. The RHS is `DEFAULT` / `LOCAL` if those keywords are the next token, otherwise the expression parser's result. ## Bug 2: PG SET multi-value list dropped after the first value Per the PostgreSQL docs: SET configuration_parameter { TO | = } { value | 'value' | DEFAULT } "Some configuration parameters take a list of values, such as search_path and datestyle." `parse()` was sharing the MySQL-style comma loop, where each comma introduces a *new* variable assignment. For PG that is wrong — the comma is value-list continuation for the SAME variable. Given `SET search_path TO "$user", public`, the loop tried to parse `public` as a new assignment, found no `=`/`TO` after it, returned nullptr, and the second value disappeared. Fix: dialect-split the post-assignment comma loop. MySQL keeps the existing behaviour (`parse_comma_item()` for each comma). PostgreSQL appends each subsequent expression as an additional child of the SAME `NODE_VAR_ASSIGNMENT` node (alongside the first RHS). Downstream consumers (e.g. ProxySQL's `walk_set_stmt` adapter) walk all `target->next_sibling`s to collect the full value list. ## Test coverage `tests/test_set.cpp` gains: - 9 entries in `pgsql_set_cases` for the new SET shapes (bulk parse-success coverage) - 5 structural `TEST_F(PgSQLSetTest, SetTimeZone...)` blocks verifying the variable name resolves to `timezone` - 4 structural `TEST_F(PgSQLSetTest, SetSearchPath...)` / `SetDatestyleMultiValue` blocks verifying the assignment node has target + N value children (instead of target + 1) Full test suite: 1223/1223 pass; 0 new failures. Reported via ProxySQL CI baseline failures in TAP group set_parser_algorithm_3-g1 (tests pgsql-set_parameter_validation_test-t, pgsql-set_statement_test-t) — both now produce the expected parser output. --- include/sql_parser/set_parser.h | 61 +++++++++++++- tests/test_set.cpp | 144 ++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+), 4 deletions(-) diff --git a/include/sql_parser/set_parser.h b/include/sql_parser/set_parser.h index ce3b9f6..216f44d 100644 --- a/include/sql_parser/set_parser.h +++ b/include/sql_parser/set_parser.h @@ -111,13 +111,66 @@ class SetParser { } } + // PostgreSQL: SET TIME ZONE + // Per the PG docs this is an alias for SET TimeZone = . The + // tokenizer has no dedicated TK_TIME / TK_ZONE keywords so the lookahead + // matches identifier text case-insensitively. can be a string + // literal, DEFAULT, LOCAL, an interval expression, or any other + // expression accepted by ExpressionParser. + if constexpr (D == Dialect::PostgreSQL) { + if (next.type == TokenType::TK_IDENTIFIER && next.text.equals_ci("TIME", 4)) { + tok_.skip(); + Token zone = tok_.peek(); + if (zone.type == TokenType::TK_IDENTIFIER && zone.text.equals_ci("ZONE", 4)) { + tok_.skip(); + AstNode* assignment = make_node(arena_, NodeType::NODE_VAR_ASSIGNMENT); + AstNode* target = make_node(arena_, NodeType::NODE_VAR_TARGET); + if (!assignment || !target) return nullptr; + // Synthetic variable name; string literal lives in static storage. + target->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, + StringRef{"timezone", 8})); + assignment->add_child(target); + Token rhs_tok = tok_.peek(); + if (rhs_tok.type == TokenType::TK_DEFAULT || + rhs_tok.type == TokenType::TK_LOCAL) { + tok_.skip(); + assignment->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, rhs_tok.text)); + } else { + AstNode* rhs = expr_parser_.parse(); + if (!rhs) return nullptr; + assignment->add_child(rhs); + } + root->add_child(assignment); + return root; + } + // TIME without ZONE is not a valid PG SET form. + return nullptr; + } + } + // SET var = expr [, var = expr, ...] AstNode* assignment = parse_variable_assignment(nullptr); if (assignment) root->add_child(assignment); - while (tok_.peek().type == TokenType::TK_COMMA) { - tok_.skip(); - AstNode* next_assign = parse_comma_item(); - if (next_assign) root->add_child(next_assign); + if constexpr (D == Dialect::PostgreSQL) { + // PG SET is single-variable; commas after the first value are list + // continuation, not new assignments (see PG docs: + // SET configuration_parameter { TO | = } { value | 'value' | DEFAULT } + // "Some configuration parameters take a list of values, such as + // search_path and datestyle.") + // Each extra value is appended as another child of the same + // VAR_ASSIGNMENT node, alongside the first RHS expression. + while (assignment && tok_.peek().type == TokenType::TK_COMMA) { + tok_.skip(); + AstNode* extra_val = expr_parser_.parse(); + if (!extra_val) break; + assignment->add_child(extra_val); + } + } else { + while (tok_.peek().type == TokenType::TK_COMMA) { + tok_.skip(); + AstNode* next_assign = parse_comma_item(); + if (next_assign) root->add_child(next_assign); + } } if (!root->first_child) return nullptr; diff --git a/tests/test_set.cpp b/tests/test_set.cpp index 639e191..67796f4 100644 --- a/tests/test_set.cpp +++ b/tests/test_set.cpp @@ -200,6 +200,17 @@ static const SetTestCase pgsql_set_cases[] = { {"SET LOCAL timezone = 'UTC'", "PG LOCAL timezone"}, {"SET NAMES 'UTF8'", "PG NAMES"}, {"SET search_path TO public, extensions", "PG search_path TO list"}, + // PG SET TIME ZONE — two-keyword alias for SET TimeZone = . + {"SET TIME ZONE 'UTC'", "PG SET TIME ZONE string literal"}, + {"SET TIME ZONE DEFAULT", "PG SET TIME ZONE DEFAULT"}, + {"SET TIME ZONE '+05:30'", "PG SET TIME ZONE numeric offset literal"}, + {"SET TIME ZONE LOCAL", "PG SET TIME ZONE LOCAL"}, + {"set time zone 'UTC'", "PG SET TIME ZONE lowercase"}, + // PG SET multi-value list — single variable, commas are value continuation. + {"SET search_path TO 'a', 'b'", "PG search_path two-value list via TO"}, + {"SET search_path = 'a', 'b', 'c'", "PG search_path three-value list via ="}, + {"SET search_path TO \"$user\", public", "PG search_path with quoted identifier + plain identifier"}, + {"SET datestyle TO 'ISO', 'mdy'", "PG datestyle two-value list"}, }; // ============================================================================ @@ -644,6 +655,139 @@ TEST_F(PgSQLSetTest, SetSearchPathToList) { ASSERT_NE(r.ast, nullptr); } +// ---------------------------------------------------------------------------- +// PG SET TIME ZONE — two-keyword alias for SET TimeZone = . +// Regression coverage: before the fix, the parser treated TIME as a normal +// identifier (variable name "time") and ZONE as the value, so the assignment +// emitted was `time = ZONE` and the rest of the statement was discarded. +// ---------------------------------------------------------------------------- + +namespace { + +// Helper: walk to the variable-target identifier node, return its StringRef. +inline StringRef set_var_name(const AstNode* root) { + if (!root || !root->first_child) return {}; + const AstNode* assignment = root->first_child; + if (!assignment || !assignment->first_child) return {}; + const AstNode* target = assignment->first_child; + if (!target || !target->first_child) return {}; + return target->first_child->value(); +} + +// Helper: count children of a node. +inline int child_count(const AstNode* node) { + int n = 0; + for (const AstNode* c = node->first_child; c; c = c->next_sibling) ++n; + return n; +} + +} // namespace + +TEST_F(PgSQLSetTest, SetTimeZoneStringLiteral) { + const char* sql = "SET TIME ZONE 'UTC'"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + EXPECT_EQ(r.ast->type, NodeType::NODE_SET_STMT); + AstNode* assignment = r.ast->first_child; + ASSERT_NE(assignment, nullptr); + EXPECT_EQ(assignment->type, NodeType::NODE_VAR_ASSIGNMENT); + EXPECT_TRUE(set_var_name(r.ast).equals_ci("timezone", 8)) + << "Expected variable name 'timezone' (PG alias for SET TIME ZONE)"; + AstNode* target = assignment->first_child; + ASSERT_NE(target->next_sibling, nullptr) << "Missing RHS value"; +} + +TEST_F(PgSQLSetTest, SetTimeZoneDefault) { + const char* sql = "SET TIME ZONE DEFAULT"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + EXPECT_TRUE(set_var_name(r.ast).equals_ci("timezone", 8)); +} + +TEST_F(PgSQLSetTest, SetTimeZoneNumericOffset) { + const char* sql = "SET TIME ZONE '+05:30'"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + EXPECT_TRUE(set_var_name(r.ast).equals_ci("timezone", 8)); +} + +TEST_F(PgSQLSetTest, SetTimeZoneLocal) { + const char* sql = "SET TIME ZONE LOCAL"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + EXPECT_TRUE(set_var_name(r.ast).equals_ci("timezone", 8)); +} + +TEST_F(PgSQLSetTest, SetTimeZoneLowercase) { + const char* sql = "set time zone 'UTC'"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + EXPECT_TRUE(set_var_name(r.ast).equals_ci("timezone", 8)); +} + +// ---------------------------------------------------------------------------- +// PG SET multi-value list — PG SET is single-variable; commas after the +// first value are continuation of the value list, NOT new variable +// assignments. Per PG docs: +// SET configuration_parameter { TO | = } { value | 'value' | DEFAULT } +// "Some configuration parameters take a list of values, such as +// search_path and datestyle." +// Regression coverage: before the fix, the parser treated each comma as a +// new assignment, so the second value was tested as a new variable target +// without `=` and silently dropped. +// ---------------------------------------------------------------------------- + +TEST_F(PgSQLSetTest, SetSearchPathMultiValueTo) { + const char* sql = "SET search_path TO 'a', 'b'"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + AstNode* assignment = r.ast->first_child; + ASSERT_NE(assignment, nullptr); + EXPECT_EQ(assignment->type, NodeType::NODE_VAR_ASSIGNMENT); + EXPECT_EQ(child_count(assignment), 3) + << "Expected target + 2 value children under the same assignment"; + EXPECT_EQ(assignment->next_sibling, nullptr) + << "PG comma after first value is list continuation, not a new assignment"; +} + +TEST_F(PgSQLSetTest, SetSearchPathMultiValueEq) { + const char* sql = "SET search_path = 'a', 'b', 'c'"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + AstNode* assignment = r.ast->first_child; + ASSERT_NE(assignment, nullptr); + EXPECT_EQ(child_count(assignment), 4) << "target + 3 values"; + EXPECT_EQ(assignment->next_sibling, nullptr); +} + +TEST_F(PgSQLSetTest, SetSearchPathQuotedIdentifierAndPlain) { + // The customer-facing failure shape from pgsql-set_parameter_validation_test-t. + const char* sql = "SET search_path TO \"$user\", public"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + AstNode* assignment = r.ast->first_child; + ASSERT_NE(assignment, nullptr); + EXPECT_EQ(child_count(assignment), 3) << "target + 2 values"; +} + +TEST_F(PgSQLSetTest, SetDatestyleMultiValue) { + const char* sql = "SET datestyle TO 'ISO', 'mdy'"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); + ASSERT_NE(r.ast, nullptr); + AstNode* assignment = r.ast->first_child; + ASSERT_NE(assignment, nullptr); + EXPECT_EQ(child_count(assignment), 3) << "target + 2 values"; +} + // ============================================================================ // Invalid syntax should return PARTIAL, not OK (issue #36) // ============================================================================