diff --git a/include/sql_parser/tokenizer.h b/include/sql_parser/tokenizer.h index 5d593e7..2b27a14 100644 --- a/include/sql_parser/tokenizer.h +++ b/include/sql_parser/tokenizer.h @@ -225,22 +225,39 @@ class Tokenizer { } // MySQL: backtick-quoted identifier + // Unclosed backticks emit TK_ERROR so the parser can fail cleanly with + // ParseResult::ERROR -- otherwise the tokenizer would silently swallow + // the rest of the input as one giant identifier (e.g. `SET x = `foo` + // would become identifier `foo` followed by an unclosed-backtick scan + // that consumes everything to EOF as another identifier). Token scan_backtick_identifier() { + const char* open_pos = cursor_; ++cursor_; // skip opening backtick const char* content_start = cursor_; while (cursor_ < end_ && *cursor_ != '`') ++cursor_; + if (cursor_ >= end_) { + return make_token(TokenType::TK_ERROR, open_pos, 1); + } uint32_t len = static_cast(cursor_ - content_start); - if (cursor_ < end_) ++cursor_; // skip closing backtick + ++cursor_; // skip closing backtick return make_token(TokenType::TK_IDENTIFIER, content_start, len); } // PostgreSQL: double-quoted identifier + // Unclosed `"` emits TK_ERROR -- same rationale as scan_backtick_identifier + // above. `SET search_path = "unclosed_quote, public` would otherwise be + // treated as identifier `unclosed_quote, public` (commas, spaces and all), + // pass validation, and corrupt search_path with garbage. Token scan_double_quoted_identifier() { + const char* open_pos = cursor_; ++cursor_; // skip opening quote const char* content_start = cursor_; while (cursor_ < end_ && *cursor_ != '"') ++cursor_; + if (cursor_ >= end_) { + return make_token(TokenType::TK_ERROR, open_pos, 1); + } uint32_t len = static_cast(cursor_ - content_start); - if (cursor_ < end_) ++cursor_; // skip closing quote + ++cursor_; // skip closing quote return make_token(TokenType::TK_IDENTIFIER, content_start, len); } diff --git a/tests/test_set.cpp b/tests/test_set.cpp index 023d66b..e19a8cc 100644 --- a/tests/test_set.cpp +++ b/tests/test_set.cpp @@ -1075,6 +1075,43 @@ TEST(MySQLSet, DollarStillBreaksUnquotedIdent) { } } +// An unclosed double-quote delimited identifier must fail the parse, not +// silently swallow the rest of the input as one identifier. Without this, +// `SET search_path = "unclosed_quote, public` parses as identifier +// `unclosed_quote, public` (commas, spaces and all), which then passes +// downstream validation and corrupts the stored value. +TEST(PgSQLSetUnclosedQuote, DoubleQuoteUnterminatedIsError) { + Parser parser; + const char* sql = "SET search_path = \"unclosed_quote, public"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::ERROR); +} + +// Same protection for MySQL backtick-delimited identifiers: an unclosed +// backtick must error, not consume to EOF. +TEST(MySQLSetUnclosedQuote, BacktickUnterminatedIsError) { + Parser parser; + const char* sql = "SET `unclosed_ident = 1"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::ERROR); +} + +// Properly-closed delimited identifiers must still parse OK -- regression +// guard against making the new error path too aggressive. +TEST(PgSQLSetUnclosedQuote, ClosedDoubleQuoteStillOk) { + Parser parser; + const char* sql = "SET search_path = \"public\""; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); +} + +TEST(MySQLSetUnclosedQuote, ClosedBacktickStillOk) { + Parser parser; + const char* sql = "SET `wait_timeout` = 1"; + auto r = parser.parse(sql, strlen(sql)); + EXPECT_EQ(r.status, ParseResult::OK); +} + // ============================================================================ // Post-1.0.4 audit follow-ups: PG non-GUC SET forms and value-preservation. // ============================================================================