77#include " sql_parser/ast.h"
88#include " sql_parser/arena.h"
99#include " sql_parser/expression_parser.h"
10+ #include " sql_parser/table_ref_parser.h"
1011
1112namespace sql_parser {
1213
1314template <Dialect D>
1415class SelectParser {
1516public:
1617 SelectParser (Tokenizer<D>& tokenizer, Arena& arena)
17- : tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena) {}
18+ : tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena),
19+ table_ref_parser_ (tokenizer, arena, expr_parser_) {}
1820
1921 // Parse a SELECT statement (SELECT keyword already consumed by classifier).
2022 AstNode* parse () {
@@ -34,7 +36,7 @@ class SelectParser {
3436 // FROM clause
3537 if (tok_.peek ().type == TokenType::TK_FROM ) {
3638 tok_.skip ();
37- AstNode* from = parse_from_clause ();
39+ AstNode* from = table_ref_parser_. parse_from_clause ();
3840 if (from) root->add_child (from);
3941 }
4042
@@ -96,6 +98,7 @@ class SelectParser {
9698 Tokenizer<D>& tok_;
9799 Arena& arena_;
98100 ExpressionParser<D> expr_parser_;
101+ TableRefParser<D> table_ref_parser_;
99102
100103 // ---- SELECT options ----
101104
@@ -152,7 +155,7 @@ class SelectParser {
152155 Token alias_name = tok_.next_token ();
153156 AstNode* alias = make_node (arena_, NodeType::NODE_ALIAS , alias_name.text );
154157 item->add_child (alias);
155- } else if (is_alias_start (next.type )) {
158+ } else if (TableRefParser<D>:: is_alias_start (next.type )) {
156159 // Implicit alias (no AS keyword): SELECT expr alias_name
157160 tok_.skip ();
158161 AstNode* alias = make_node (arena_, NodeType::NODE_ALIAS , next.text );
@@ -161,165 +164,6 @@ class SelectParser {
161164 return item;
162165 }
163166
164- // ---- FROM clause ----
165-
166- AstNode* parse_from_clause () {
167- AstNode* from = make_node (arena_, NodeType::NODE_FROM_CLAUSE );
168- if (!from) return nullptr ;
169-
170- // First table reference
171- AstNode* table_ref = parse_table_reference ();
172- if (table_ref) from->add_child (table_ref);
173-
174- // Additional table refs (comma join) or explicit JOINs
175- while (true ) {
176- Token t = tok_.peek ();
177- if (t.type == TokenType::TK_COMMA ) {
178- // Comma join: FROM t1, t2
179- tok_.skip ();
180- AstNode* next_ref = parse_table_reference ();
181- if (next_ref) from->add_child (next_ref);
182- } else if (is_join_start (t.type )) {
183- // Explicit JOIN
184- AstNode* join = parse_join (from->first_child );
185- if (join) {
186- // Replace the last table ref with the join node
187- // Actually, append the join as a child of FROM
188- from->add_child (join);
189- }
190- } else {
191- break ;
192- }
193- }
194-
195- return from;
196- }
197-
198- AstNode* parse_table_reference () {
199- Token t = tok_.peek ();
200-
201- // Subquery: (SELECT ...)
202- if (t.type == TokenType::TK_LPAREN ) {
203- tok_.skip ();
204- if (tok_.peek ().type == TokenType::TK_SELECT ) {
205- AstNode* subq = make_node (arena_, NodeType::NODE_SUBQUERY );
206- // Skip to matching paren
207- int depth = 1 ;
208- while (depth > 0 ) {
209- Token st = tok_.next_token ();
210- if (st.type == TokenType::TK_LPAREN ) ++depth;
211- else if (st.type == TokenType::TK_RPAREN ) --depth;
212- else if (st.type == TokenType::TK_EOF ) break ;
213- }
214- // Optional alias
215- AstNode* ref = make_node (arena_, NodeType::NODE_TABLE_REF );
216- ref->add_child (subq);
217- parse_optional_alias (ref);
218- return ref;
219- }
220- // Parenthesized table reference -- parse inner
221- AstNode* inner = parse_table_reference ();
222- if (tok_.peek ().type == TokenType::TK_RPAREN ) tok_.skip ();
223- return inner;
224- }
225-
226- // Simple table name or schema.table
227- AstNode* ref = make_node (arena_, NodeType::NODE_TABLE_REF );
228- Token name = tok_.next_token ();
229-
230- if (tok_.peek ().type == TokenType::TK_DOT ) {
231- // Qualified: schema.table
232- tok_.skip ();
233- Token table_name = tok_.next_token ();
234- AstNode* qname = make_node (arena_, NodeType::NODE_QUALIFIED_NAME );
235- qname->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER , name.text ));
236- qname->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER , table_name.text ));
237- ref->add_child (qname);
238- } else {
239- ref->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER , name.text ));
240- }
241-
242- // Optional alias
243- parse_optional_alias (ref);
244- return ref;
245- }
246-
247- void parse_optional_alias (AstNode* parent) {
248- Token t = tok_.peek ();
249- if (t.type == TokenType::TK_AS ) {
250- tok_.skip ();
251- Token alias_name = tok_.next_token ();
252- parent->add_child (make_node (arena_, NodeType::NODE_ALIAS , alias_name.text ));
253- } else if (is_alias_start (t.type )) {
254- tok_.skip ();
255- parent->add_child (make_node (arena_, NodeType::NODE_ALIAS , t.text ));
256- }
257- }
258-
259- // ---- JOIN ----
260-
261- AstNode* parse_join (AstNode* /* left_ref */ ) {
262- AstNode* join = make_node (arena_, NodeType::NODE_JOIN_CLAUSE );
263- if (!join) return nullptr ;
264-
265- // Consume join type tokens
266- Token t = tok_.peek ();
267- StringRef join_type_start = t.text ;
268- StringRef join_type_end = t.text ;
269-
270- // Optional: NATURAL, LEFT, RIGHT, FULL, INNER, OUTER, CROSS
271- while (t.type == TokenType::TK_NATURAL || t.type == TokenType::TK_LEFT ||
272- t.type == TokenType::TK_RIGHT || t.type == TokenType::TK_FULL ||
273- t.type == TokenType::TK_INNER || t.type == TokenType::TK_OUTER ||
274- t.type == TokenType::TK_CROSS ) {
275- tok_.skip ();
276- join_type_end = t.text ;
277- t = tok_.peek ();
278- }
279-
280- // Expect JOIN keyword
281- if (t.type == TokenType::TK_JOIN ) {
282- join_type_end = t.text ;
283- tok_.skip ();
284- }
285-
286- // Set join type as value (covers the span from first modifier to JOIN)
287- StringRef join_type{join_type_start.ptr ,
288- static_cast <uint32_t >((join_type_end.ptr + join_type_end.len ) - join_type_start.ptr )};
289- join->value_ptr = join_type.ptr ;
290- join->value_len = join_type.len ;
291-
292- // Right table reference
293- AstNode* right_ref = parse_table_reference ();
294- if (right_ref) join->add_child (right_ref);
295-
296- // Join condition: ON expr or USING (col_list)
297- if (tok_.peek ().type == TokenType::TK_ON ) {
298- tok_.skip ();
299- AstNode* on_expr = expr_parser_.parse ();
300- if (on_expr) join->add_child (on_expr);
301- } else if (tok_.peek ().type == TokenType::TK_USING ) {
302- tok_.skip ();
303- if (tok_.peek ().type == TokenType::TK_LPAREN ) {
304- tok_.skip ();
305- AstNode* using_list = make_node (arena_, NodeType::NODE_IDENTIFIER , StringRef{" USING" , 5 });
306- while (true ) {
307- Token col = tok_.next_token ();
308- using_list->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER , col.text ));
309- if (tok_.peek ().type == TokenType::TK_COMMA ) {
310- tok_.skip ();
311- } else {
312- break ;
313- }
314- }
315- if (tok_.peek ().type == TokenType::TK_RPAREN ) tok_.skip ();
316- join->add_child (using_list);
317- }
318- }
319-
320- return join;
321- }
322-
323167 // ---- WHERE ----
324168
325169 AstNode* parse_where_clause () {
@@ -492,52 +336,6 @@ class SelectParser {
492336
493337 return into;
494338 }
495-
496- // ---- Helpers ----
497-
498- static bool is_join_start (TokenType type) {
499- return type == TokenType::TK_JOIN || type == TokenType::TK_INNER ||
500- type == TokenType::TK_LEFT || type == TokenType::TK_RIGHT ||
501- type == TokenType::TK_FULL || type == TokenType::TK_OUTER ||
502- type == TokenType::TK_CROSS || type == TokenType::TK_NATURAL ;
503- }
504-
505- // Check if a token can start an implicit alias (identifier-like, not a clause keyword)
506- static bool is_alias_start (TokenType type) {
507- if (type == TokenType::TK_IDENTIFIER ) return true ;
508- // Some keywords are NOT valid alias starts because they start clauses
509- switch (type) {
510- case TokenType::TK_FROM :
511- case TokenType::TK_WHERE :
512- case TokenType::TK_GROUP :
513- case TokenType::TK_HAVING :
514- case TokenType::TK_ORDER :
515- case TokenType::TK_LIMIT :
516- case TokenType::TK_FOR :
517- case TokenType::TK_INTO :
518- case TokenType::TK_JOIN :
519- case TokenType::TK_INNER :
520- case TokenType::TK_LEFT :
521- case TokenType::TK_RIGHT :
522- case TokenType::TK_FULL :
523- case TokenType::TK_OUTER :
524- case TokenType::TK_CROSS :
525- case TokenType::TK_NATURAL :
526- case TokenType::TK_ON :
527- case TokenType::TK_USING :
528- case TokenType::TK_UNION :
529- case TokenType::TK_SEMICOLON :
530- case TokenType::TK_RPAREN :
531- case TokenType::TK_EOF :
532- case TokenType::TK_COMMA :
533- case TokenType::TK_SET :
534- case TokenType::TK_LOCK :
535- case TokenType::TK_UNLOCK :
536- return false ;
537- default :
538- return true ; // Keywords not in the blocklist can be implicit aliases
539- }
540- }
541339};
542340
543341} // namespace sql_parser
0 commit comments