Skip to content

Commit b66f241

Browse files
committed
Extract TableRefParser from SelectParser for shared table ref parsing
Moves parse_from_clause(), parse_table_reference(), parse_join(), parse_optional_alias(), is_join_start(), and is_alias_start() into a standalone TableRefParser<D> utility class. SelectParser delegates to TableRefParser internally. All 236 existing tests pass unchanged.
1 parent c830060 commit b66f241

2 files changed

Lines changed: 239 additions & 208 deletions

File tree

include/sql_parser/select_parser.h

Lines changed: 6 additions & 208 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,16 @@
77
#include "sql_parser/ast.h"
88
#include "sql_parser/arena.h"
99
#include "sql_parser/expression_parser.h"
10+
#include "sql_parser/table_ref_parser.h"
1011

1112
namespace sql_parser {
1213

1314
template <Dialect D>
1415
class SelectParser {
1516
public:
1617
SelectParser(Tokenizer<D>& tokenizer, Arena& arena)
17-
: tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena) {}
18+
: tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena),
19+
table_ref_parser_(tokenizer, arena, expr_parser_) {}
1820

1921
// Parse a SELECT statement (SELECT keyword already consumed by classifier).
2022
AstNode* parse() {
@@ -34,7 +36,7 @@ class SelectParser {
3436
// FROM clause
3537
if (tok_.peek().type == TokenType::TK_FROM) {
3638
tok_.skip();
37-
AstNode* from = parse_from_clause();
39+
AstNode* from = table_ref_parser_.parse_from_clause();
3840
if (from) root->add_child(from);
3941
}
4042

@@ -96,6 +98,7 @@ class SelectParser {
9698
Tokenizer<D>& tok_;
9799
Arena& arena_;
98100
ExpressionParser<D> expr_parser_;
101+
TableRefParser<D> table_ref_parser_;
99102

100103
// ---- SELECT options ----
101104

@@ -152,7 +155,7 @@ class SelectParser {
152155
Token alias_name = tok_.next_token();
153156
AstNode* alias = make_node(arena_, NodeType::NODE_ALIAS, alias_name.text);
154157
item->add_child(alias);
155-
} else if (is_alias_start(next.type)) {
158+
} else if (TableRefParser<D>::is_alias_start(next.type)) {
156159
// Implicit alias (no AS keyword): SELECT expr alias_name
157160
tok_.skip();
158161
AstNode* alias = make_node(arena_, NodeType::NODE_ALIAS, next.text);
@@ -161,165 +164,6 @@ class SelectParser {
161164
return item;
162165
}
163166

164-
// ---- FROM clause ----
165-
166-
AstNode* parse_from_clause() {
167-
AstNode* from = make_node(arena_, NodeType::NODE_FROM_CLAUSE);
168-
if (!from) return nullptr;
169-
170-
// First table reference
171-
AstNode* table_ref = parse_table_reference();
172-
if (table_ref) from->add_child(table_ref);
173-
174-
// Additional table refs (comma join) or explicit JOINs
175-
while (true) {
176-
Token t = tok_.peek();
177-
if (t.type == TokenType::TK_COMMA) {
178-
// Comma join: FROM t1, t2
179-
tok_.skip();
180-
AstNode* next_ref = parse_table_reference();
181-
if (next_ref) from->add_child(next_ref);
182-
} else if (is_join_start(t.type)) {
183-
// Explicit JOIN
184-
AstNode* join = parse_join(from->first_child);
185-
if (join) {
186-
// Replace the last table ref with the join node
187-
// Actually, append the join as a child of FROM
188-
from->add_child(join);
189-
}
190-
} else {
191-
break;
192-
}
193-
}
194-
195-
return from;
196-
}
197-
198-
AstNode* parse_table_reference() {
199-
Token t = tok_.peek();
200-
201-
// Subquery: (SELECT ...)
202-
if (t.type == TokenType::TK_LPAREN) {
203-
tok_.skip();
204-
if (tok_.peek().type == TokenType::TK_SELECT) {
205-
AstNode* subq = make_node(arena_, NodeType::NODE_SUBQUERY);
206-
// Skip to matching paren
207-
int depth = 1;
208-
while (depth > 0) {
209-
Token st = tok_.next_token();
210-
if (st.type == TokenType::TK_LPAREN) ++depth;
211-
else if (st.type == TokenType::TK_RPAREN) --depth;
212-
else if (st.type == TokenType::TK_EOF) break;
213-
}
214-
// Optional alias
215-
AstNode* ref = make_node(arena_, NodeType::NODE_TABLE_REF);
216-
ref->add_child(subq);
217-
parse_optional_alias(ref);
218-
return ref;
219-
}
220-
// Parenthesized table reference -- parse inner
221-
AstNode* inner = parse_table_reference();
222-
if (tok_.peek().type == TokenType::TK_RPAREN) tok_.skip();
223-
return inner;
224-
}
225-
226-
// Simple table name or schema.table
227-
AstNode* ref = make_node(arena_, NodeType::NODE_TABLE_REF);
228-
Token name = tok_.next_token();
229-
230-
if (tok_.peek().type == TokenType::TK_DOT) {
231-
// Qualified: schema.table
232-
tok_.skip();
233-
Token table_name = tok_.next_token();
234-
AstNode* qname = make_node(arena_, NodeType::NODE_QUALIFIED_NAME);
235-
qname->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, name.text));
236-
qname->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, table_name.text));
237-
ref->add_child(qname);
238-
} else {
239-
ref->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, name.text));
240-
}
241-
242-
// Optional alias
243-
parse_optional_alias(ref);
244-
return ref;
245-
}
246-
247-
void parse_optional_alias(AstNode* parent) {
248-
Token t = tok_.peek();
249-
if (t.type == TokenType::TK_AS) {
250-
tok_.skip();
251-
Token alias_name = tok_.next_token();
252-
parent->add_child(make_node(arena_, NodeType::NODE_ALIAS, alias_name.text));
253-
} else if (is_alias_start(t.type)) {
254-
tok_.skip();
255-
parent->add_child(make_node(arena_, NodeType::NODE_ALIAS, t.text));
256-
}
257-
}
258-
259-
// ---- JOIN ----
260-
261-
AstNode* parse_join(AstNode* /* left_ref */) {
262-
AstNode* join = make_node(arena_, NodeType::NODE_JOIN_CLAUSE);
263-
if (!join) return nullptr;
264-
265-
// Consume join type tokens
266-
Token t = tok_.peek();
267-
StringRef join_type_start = t.text;
268-
StringRef join_type_end = t.text;
269-
270-
// Optional: NATURAL, LEFT, RIGHT, FULL, INNER, OUTER, CROSS
271-
while (t.type == TokenType::TK_NATURAL || t.type == TokenType::TK_LEFT ||
272-
t.type == TokenType::TK_RIGHT || t.type == TokenType::TK_FULL ||
273-
t.type == TokenType::TK_INNER || t.type == TokenType::TK_OUTER ||
274-
t.type == TokenType::TK_CROSS) {
275-
tok_.skip();
276-
join_type_end = t.text;
277-
t = tok_.peek();
278-
}
279-
280-
// Expect JOIN keyword
281-
if (t.type == TokenType::TK_JOIN) {
282-
join_type_end = t.text;
283-
tok_.skip();
284-
}
285-
286-
// Set join type as value (covers the span from first modifier to JOIN)
287-
StringRef join_type{join_type_start.ptr,
288-
static_cast<uint32_t>((join_type_end.ptr + join_type_end.len) - join_type_start.ptr)};
289-
join->value_ptr = join_type.ptr;
290-
join->value_len = join_type.len;
291-
292-
// Right table reference
293-
AstNode* right_ref = parse_table_reference();
294-
if (right_ref) join->add_child(right_ref);
295-
296-
// Join condition: ON expr or USING (col_list)
297-
if (tok_.peek().type == TokenType::TK_ON) {
298-
tok_.skip();
299-
AstNode* on_expr = expr_parser_.parse();
300-
if (on_expr) join->add_child(on_expr);
301-
} else if (tok_.peek().type == TokenType::TK_USING) {
302-
tok_.skip();
303-
if (tok_.peek().type == TokenType::TK_LPAREN) {
304-
tok_.skip();
305-
AstNode* using_list = make_node(arena_, NodeType::NODE_IDENTIFIER, StringRef{"USING", 5});
306-
while (true) {
307-
Token col = tok_.next_token();
308-
using_list->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, col.text));
309-
if (tok_.peek().type == TokenType::TK_COMMA) {
310-
tok_.skip();
311-
} else {
312-
break;
313-
}
314-
}
315-
if (tok_.peek().type == TokenType::TK_RPAREN) tok_.skip();
316-
join->add_child(using_list);
317-
}
318-
}
319-
320-
return join;
321-
}
322-
323167
// ---- WHERE ----
324168

325169
AstNode* parse_where_clause() {
@@ -492,52 +336,6 @@ class SelectParser {
492336

493337
return into;
494338
}
495-
496-
// ---- Helpers ----
497-
498-
static bool is_join_start(TokenType type) {
499-
return type == TokenType::TK_JOIN || type == TokenType::TK_INNER ||
500-
type == TokenType::TK_LEFT || type == TokenType::TK_RIGHT ||
501-
type == TokenType::TK_FULL || type == TokenType::TK_OUTER ||
502-
type == TokenType::TK_CROSS || type == TokenType::TK_NATURAL;
503-
}
504-
505-
// Check if a token can start an implicit alias (identifier-like, not a clause keyword)
506-
static bool is_alias_start(TokenType type) {
507-
if (type == TokenType::TK_IDENTIFIER) return true;
508-
// Some keywords are NOT valid alias starts because they start clauses
509-
switch (type) {
510-
case TokenType::TK_FROM:
511-
case TokenType::TK_WHERE:
512-
case TokenType::TK_GROUP:
513-
case TokenType::TK_HAVING:
514-
case TokenType::TK_ORDER:
515-
case TokenType::TK_LIMIT:
516-
case TokenType::TK_FOR:
517-
case TokenType::TK_INTO:
518-
case TokenType::TK_JOIN:
519-
case TokenType::TK_INNER:
520-
case TokenType::TK_LEFT:
521-
case TokenType::TK_RIGHT:
522-
case TokenType::TK_FULL:
523-
case TokenType::TK_OUTER:
524-
case TokenType::TK_CROSS:
525-
case TokenType::TK_NATURAL:
526-
case TokenType::TK_ON:
527-
case TokenType::TK_USING:
528-
case TokenType::TK_UNION:
529-
case TokenType::TK_SEMICOLON:
530-
case TokenType::TK_RPAREN:
531-
case TokenType::TK_EOF:
532-
case TokenType::TK_COMMA:
533-
case TokenType::TK_SET:
534-
case TokenType::TK_LOCK:
535-
case TokenType::TK_UNLOCK:
536-
return false;
537-
default:
538-
return true; // Keywords not in the blocklist can be implicit aliases
539-
}
540-
}
541339
};
542340

543341
} // namespace sql_parser

0 commit comments

Comments
 (0)