77#include " sql_parser/ast.h"
88#include " sql_parser/arena.h"
99#include " sql_parser/expression_parser.h"
10+ #include " sql_parser/table_ref_parser.h"
1011
1112namespace sql_parser {
1213
1314template <Dialect D>
1415class SelectParser {
1516public:
1617 SelectParser (Tokenizer<D>& tokenizer, Arena& arena)
17- : tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena) {}
18+ : tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena),
19+ table_ref_parser_ (tokenizer, arena, expr_parser_) {}
1820
1921 // Parse a SELECT statement (SELECT keyword already consumed by classifier).
2022 AstNode* parse () {
@@ -34,7 +36,7 @@ class SelectParser {
3436 // FROM clause
3537 if (tok_.peek ().type == TokenType::TK_FROM) {
3638 tok_.skip ();
37- AstNode* from = parse_from_clause ();
39+ AstNode* from = table_ref_parser_. parse_from_clause ();
3840 if (from) root->add_child (from);
3941 }
4042
@@ -96,6 +98,7 @@ class SelectParser {
9698 Tokenizer<D>& tok_;
9799 Arena& arena_;
98100 ExpressionParser<D> expr_parser_;
101+ TableRefParser<D> table_ref_parser_;
99102
100103 // ---- SELECT options ----
101104
@@ -152,7 +155,7 @@ class SelectParser {
152155 Token alias_name = tok_.next_token ();
153156 AstNode* alias = make_node (arena_, NodeType::NODE_ALIAS, alias_name.text );
154157 item->add_child (alias);
155- } else if (is_alias_start (next.type )) {
158+ } else if (TableRefParser<D>:: is_alias_start (next.type )) {
156159 // Implicit alias (no AS keyword): SELECT expr alias_name
157160 tok_.skip ();
158161 AstNode* alias = make_node (arena_, NodeType::NODE_ALIAS, next.text );
@@ -161,165 +164,6 @@ class SelectParser {
161164 return item;
162165 }
163166
164- // ---- FROM clause ----
165-
166- AstNode* parse_from_clause () {
167- AstNode* from = make_node (arena_, NodeType::NODE_FROM_CLAUSE);
168- if (!from) return nullptr ;
169-
170- // First table reference
171- AstNode* table_ref = parse_table_reference ();
172- if (table_ref) from->add_child (table_ref);
173-
174- // Additional table refs (comma join) or explicit JOINs
175- while (true ) {
176- Token t = tok_.peek ();
177- if (t.type == TokenType::TK_COMMA) {
178- // Comma join: FROM t1, t2
179- tok_.skip ();
180- AstNode* next_ref = parse_table_reference ();
181- if (next_ref) from->add_child (next_ref);
182- } else if (is_join_start (t.type )) {
183- // Explicit JOIN
184- AstNode* join = parse_join (from->first_child );
185- if (join) {
186- // Replace the last table ref with the join node
187- // Actually, append the join as a child of FROM
188- from->add_child (join);
189- }
190- } else {
191- break ;
192- }
193- }
194-
195- return from;
196- }
197-
198- AstNode* parse_table_reference () {
199- Token t = tok_.peek ();
200-
201- // Subquery: (SELECT ...)
202- if (t.type == TokenType::TK_LPAREN) {
203- tok_.skip ();
204- if (tok_.peek ().type == TokenType::TK_SELECT) {
205- AstNode* subq = make_node (arena_, NodeType::NODE_SUBQUERY);
206- // Skip to matching paren
207- int depth = 1 ;
208- while (depth > 0 ) {
209- Token st = tok_.next_token ();
210- if (st.type == TokenType::TK_LPAREN) ++depth;
211- else if (st.type == TokenType::TK_RPAREN) --depth;
212- else if (st.type == TokenType::TK_EOF) break ;
213- }
214- // Optional alias
215- AstNode* ref = make_node (arena_, NodeType::NODE_TABLE_REF);
216- ref->add_child (subq);
217- parse_optional_alias (ref);
218- return ref;
219- }
220- // Parenthesized table reference -- parse inner
221- AstNode* inner = parse_table_reference ();
222- if (tok_.peek ().type == TokenType::TK_RPAREN) tok_.skip ();
223- return inner;
224- }
225-
226- // Simple table name or schema.table
227- AstNode* ref = make_node (arena_, NodeType::NODE_TABLE_REF);
228- Token name = tok_.next_token ();
229-
230- if (tok_.peek ().type == TokenType::TK_DOT) {
231- // Qualified: schema.table
232- tok_.skip ();
233- Token table_name = tok_.next_token ();
234- AstNode* qname = make_node (arena_, NodeType::NODE_QUALIFIED_NAME);
235- qname->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER, name.text ));
236- qname->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER, table_name.text ));
237- ref->add_child (qname);
238- } else {
239- ref->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER, name.text ));
240- }
241-
242- // Optional alias
243- parse_optional_alias (ref);
244- return ref;
245- }
246-
247- void parse_optional_alias (AstNode* parent) {
248- Token t = tok_.peek ();
249- if (t.type == TokenType::TK_AS) {
250- tok_.skip ();
251- Token alias_name = tok_.next_token ();
252- parent->add_child (make_node (arena_, NodeType::NODE_ALIAS, alias_name.text ));
253- } else if (is_alias_start (t.type )) {
254- tok_.skip ();
255- parent->add_child (make_node (arena_, NodeType::NODE_ALIAS, t.text ));
256- }
257- }
258-
259- // ---- JOIN ----
260-
261- AstNode* parse_join (AstNode* /* left_ref */ ) {
262- AstNode* join = make_node (arena_, NodeType::NODE_JOIN_CLAUSE);
263- if (!join) return nullptr ;
264-
265- // Consume join type tokens
266- Token t = tok_.peek ();
267- StringRef join_type_start = t.text ;
268- StringRef join_type_end = t.text ;
269-
270- // Optional: NATURAL, LEFT, RIGHT, FULL, INNER, OUTER, CROSS
271- while (t.type == TokenType::TK_NATURAL || t.type == TokenType::TK_LEFT ||
272- t.type == TokenType::TK_RIGHT || t.type == TokenType::TK_FULL ||
273- t.type == TokenType::TK_INNER || t.type == TokenType::TK_OUTER ||
274- t.type == TokenType::TK_CROSS) {
275- tok_.skip ();
276- join_type_end = t.text ;
277- t = tok_.peek ();
278- }
279-
280- // Expect JOIN keyword
281- if (t.type == TokenType::TK_JOIN) {
282- join_type_end = t.text ;
283- tok_.skip ();
284- }
285-
286- // Set join type as value (covers the span from first modifier to JOIN)
287- StringRef join_type{join_type_start.ptr ,
288- static_cast <uint32_t >((join_type_end.ptr + join_type_end.len ) - join_type_start.ptr )};
289- join->value_ptr = join_type.ptr ;
290- join->value_len = join_type.len ;
291-
292- // Right table reference
293- AstNode* right_ref = parse_table_reference ();
294- if (right_ref) join->add_child (right_ref);
295-
296- // Join condition: ON expr or USING (col_list)
297- if (tok_.peek ().type == TokenType::TK_ON) {
298- tok_.skip ();
299- AstNode* on_expr = expr_parser_.parse ();
300- if (on_expr) join->add_child (on_expr);
301- } else if (tok_.peek ().type == TokenType::TK_USING) {
302- tok_.skip ();
303- if (tok_.peek ().type == TokenType::TK_LPAREN) {
304- tok_.skip ();
305- AstNode* using_list = make_node (arena_, NodeType::NODE_IDENTIFIER, StringRef{" USING" , 5 });
306- while (true ) {
307- Token col = tok_.next_token ();
308- using_list->add_child (make_node (arena_, NodeType::NODE_IDENTIFIER, col.text ));
309- if (tok_.peek ().type == TokenType::TK_COMMA) {
310- tok_.skip ();
311- } else {
312- break ;
313- }
314- }
315- if (tok_.peek ().type == TokenType::TK_RPAREN) tok_.skip ();
316- join->add_child (using_list);
317- }
318- }
319-
320- return join;
321- }
322-
323167 // ---- WHERE ----
324168
325169 AstNode* parse_where_clause () {
@@ -492,52 +336,6 @@ class SelectParser {
492336
493337 return into;
494338 }
495-
496- // ---- Helpers ----
497-
498- static bool is_join_start (TokenType type) {
499- return type == TokenType::TK_JOIN || type == TokenType::TK_INNER ||
500- type == TokenType::TK_LEFT || type == TokenType::TK_RIGHT ||
501- type == TokenType::TK_FULL || type == TokenType::TK_OUTER ||
502- type == TokenType::TK_CROSS || type == TokenType::TK_NATURAL;
503- }
504-
505- // Check if a token can start an implicit alias (identifier-like, not a clause keyword)
506- static bool is_alias_start (TokenType type) {
507- if (type == TokenType::TK_IDENTIFIER) return true ;
508- // Some keywords are NOT valid alias starts because they start clauses
509- switch (type) {
510- case TokenType::TK_FROM:
511- case TokenType::TK_WHERE:
512- case TokenType::TK_GROUP:
513- case TokenType::TK_HAVING:
514- case TokenType::TK_ORDER:
515- case TokenType::TK_LIMIT:
516- case TokenType::TK_FOR:
517- case TokenType::TK_INTO:
518- case TokenType::TK_JOIN:
519- case TokenType::TK_INNER:
520- case TokenType::TK_LEFT:
521- case TokenType::TK_RIGHT:
522- case TokenType::TK_FULL:
523- case TokenType::TK_OUTER:
524- case TokenType::TK_CROSS:
525- case TokenType::TK_NATURAL:
526- case TokenType::TK_ON:
527- case TokenType::TK_USING:
528- case TokenType::TK_UNION:
529- case TokenType::TK_SEMICOLON:
530- case TokenType::TK_RPAREN:
531- case TokenType::TK_EOF:
532- case TokenType::TK_COMMA:
533- case TokenType::TK_SET:
534- case TokenType::TK_LOCK:
535- case TokenType::TK_UNLOCK:
536- return false ;
537- default :
538- return true ; // Keywords not in the blocklist can be implicit aliases
539- }
540- }
541339};
542340
543341} // namespace sql_parser
0 commit comments