Skip to content

Commit 5c8f2bd

Browse files
committed
Add compound query tests and fix SelectParser compound mode
Create test_compound.cpp with 22 tests covering UNION, UNION ALL, INTERSECT, INTERSECT ALL, EXCEPT, EXCEPT ALL, precedence verification, parenthesized nesting, trailing ORDER BY/LIMIT, and round-trip emission. Add compound_mode flag to SelectParser that prevents eager consumption of ORDER BY/LIMIT/FOR clauses, allowing CompoundQueryParser to claim them as compound-level clauses. Fix parse_operand to consume SELECT keyword when present for subsequent SELECTs in compound expressions.
1 parent b0ac674 commit 5c8f2bd

5 files changed

Lines changed: 412 additions & 37 deletions

File tree

Makefile.new

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ TEST_SRCS = $(TEST_DIR)/test_main.cpp \
3030
$(TEST_DIR)/test_stmt_cache.cpp \
3131
$(TEST_DIR)/test_insert.cpp \
3232
$(TEST_DIR)/test_update.cpp \
33-
$(TEST_DIR)/test_delete.cpp
33+
$(TEST_DIR)/test_delete.cpp \
34+
$(TEST_DIR)/test_compound.cpp
3435
TEST_OBJS = $(TEST_SRCS:.cpp=.o)
3536
TEST_TARGET = $(PROJECT_ROOT)/run_tests
3637

include/sql_parser/compound_query_parser.h

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,32 @@ class CompoundQueryParser {
4747
return compound;
4848
}
4949

50-
// No set operator found -- return the bare SELECT as-is
50+
// No set operator found -- return the bare SELECT as-is.
51+
// Since we used compound_mode, ORDER BY/LIMIT/FOR weren't consumed.
52+
// Parse them now and attach to the SELECT node.
53+
if (result->type == NodeType::NODE_SELECT_STMT) {
54+
if (tok_.peek().type == TokenType::TK_ORDER) {
55+
tok_.skip();
56+
if (tok_.peek().type == TokenType::TK_BY) tok_.skip();
57+
AstNode* order_by = parse_order_by();
58+
if (order_by) result->add_child(order_by);
59+
}
60+
if (tok_.peek().type == TokenType::TK_LIMIT) {
61+
tok_.skip();
62+
AstNode* limit = parse_limit();
63+
if (limit) result->add_child(limit);
64+
}
65+
// FOR UPDATE / FOR SHARE
66+
if (tok_.peek().type == TokenType::TK_FOR) {
67+
tok_.skip();
68+
AstNode* lock = make_node(arena_, NodeType::NODE_LOCKING_CLAUSE);
69+
if (lock) {
70+
Token strength = tok_.next_token();
71+
lock->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, strength.text));
72+
result->add_child(lock);
73+
}
74+
}
75+
}
5176
return result;
5277
}
5378

@@ -129,15 +154,26 @@ class CompoundQueryParser {
129154
if (tok_.peek().type == TokenType::TK_SELECT) {
130155
tok_.skip(); // consume SELECT
131156
// Create a SelectParser that will parse from after SELECT
132-
SelectParser<D> sp(tok_, arena_);
157+
SelectParser<D> sp(tok_, arena_, true);
133158
AstNode* select = sp.parse();
134159

135160
// Check if a set operator follows inside the parens
136161
if (is_set_operator(tok_.peek().type)) {
137162
// There's a compound inside the parens
138-
// We need to continue parsing with select as the left operand
139163
inner = continue_compound_from(select, 0);
140164
} else {
165+
// Single SELECT inside parens -- parse ORDER BY/LIMIT
166+
if (tok_.peek().type == TokenType::TK_ORDER) {
167+
tok_.skip();
168+
if (tok_.peek().type == TokenType::TK_BY) tok_.skip();
169+
AstNode* ob = parse_order_by();
170+
if (ob) select->add_child(ob);
171+
}
172+
if (tok_.peek().type == TokenType::TK_LIMIT) {
173+
tok_.skip();
174+
AstNode* lim = parse_limit();
175+
if (lim) select->add_child(lim);
176+
}
141177
inner = select;
142178
}
143179
} else {
@@ -155,9 +191,13 @@ class CompoundQueryParser {
155191
}
156192

157193
// Not parenthesized -- must be a plain SELECT
158-
// Note: SELECT keyword was already consumed by the classifier
159-
// The tokenizer is positioned right after SELECT
160-
SelectParser<D> sp(tok_, arena_);
194+
// Consume SELECT keyword if present (already consumed by classifier
195+
// for the first call, but present for subsequent SELECTs in compound)
196+
if (tok_.peek().type == TokenType::TK_SELECT) {
197+
tok_.skip();
198+
}
199+
// Use compound_mode=true so ORDER BY/LIMIT aren't consumed
200+
SelectParser<D> sp(tok_, arena_, true);
161201
return sp.parse();
162202
}
163203

@@ -183,7 +223,7 @@ class CompoundQueryParser {
183223
AstNode* right = nullptr;
184224
if (tok_.peek().type == TokenType::TK_SELECT) {
185225
tok_.skip();
186-
SelectParser<D> sp(tok_, arena_);
226+
SelectParser<D> sp(tok_, arena_, true);
187227
AstNode* rsel = sp.parse();
188228
// Check for more operators at higher precedence
189229
right = continue_compound_from(rsel, prec);

include/sql_parser/select_parser.h

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,14 @@ namespace sql_parser {
1414
template <Dialect D>
1515
class SelectParser {
1616
public:
17-
SelectParser(Tokenizer<D>& tokenizer, Arena& arena)
17+
SelectParser(Tokenizer<D>& tokenizer, Arena& arena, bool compound_mode = false)
1818
: tok_(tokenizer), arena_(arena), expr_parser_(tokenizer, arena),
19-
table_ref_parser_(tokenizer, arena, expr_parser_) {}
19+
table_ref_parser_(tokenizer, arena, expr_parser_),
20+
compound_mode_(compound_mode) {}
2021

2122
// Parse a SELECT statement (SELECT keyword already consumed by classifier).
23+
// In compound_mode, stops before ORDER BY / LIMIT so they can be claimed
24+
// by the compound query parser.
2225
AstNode* parse() {
2326
AstNode* root = make_node(arena_, NodeType::NODE_SELECT_STMT);
2427
if (!root) return nullptr;
@@ -62,32 +65,36 @@ class SelectParser {
6265
if (having) root->add_child(having);
6366
}
6467

65-
// ORDER BY clause
66-
if (tok_.peek().type == TokenType::TK_ORDER) {
67-
tok_.skip();
68-
if (tok_.peek().type == TokenType::TK_BY) tok_.skip();
69-
AstNode* order_by = parse_order_by();
70-
if (order_by) root->add_child(order_by);
71-
}
68+
// In compound_mode, stop before ORDER BY / LIMIT so the compound
69+
// query parser can claim them as applying to the compound result.
70+
if (!compound_mode_) {
71+
// ORDER BY clause
72+
if (tok_.peek().type == TokenType::TK_ORDER) {
73+
tok_.skip();
74+
if (tok_.peek().type == TokenType::TK_BY) tok_.skip();
75+
AstNode* order_by = parse_order_by();
76+
if (order_by) root->add_child(order_by);
77+
}
7278

73-
// LIMIT clause
74-
if (tok_.peek().type == TokenType::TK_LIMIT) {
75-
tok_.skip();
76-
AstNode* limit = parse_limit();
77-
if (limit) root->add_child(limit);
78-
}
79+
// LIMIT clause
80+
if (tok_.peek().type == TokenType::TK_LIMIT) {
81+
tok_.skip();
82+
AstNode* limit = parse_limit();
83+
if (limit) root->add_child(limit);
84+
}
7985

80-
// FOR UPDATE / FOR SHARE (locking)
81-
if (tok_.peek().type == TokenType::TK_FOR) {
82-
AstNode* lock = parse_locking();
83-
if (lock) root->add_child(lock);
84-
}
86+
// FOR UPDATE / FOR SHARE (locking)
87+
if (tok_.peek().type == TokenType::TK_FOR) {
88+
AstNode* lock = parse_locking();
89+
if (lock) root->add_child(lock);
90+
}
8591

86-
// INTO (MySQL: can appear here too -- INTO OUTFILE/DUMPFILE/var)
87-
if constexpr (D == Dialect::MySQL) {
88-
if (tok_.peek().type == TokenType::TK_INTO) {
89-
AstNode* into = parse_into();
90-
if (into) root->add_child(into);
92+
// INTO (MySQL: can appear here too -- INTO OUTFILE/DUMPFILE/var)
93+
if constexpr (D == Dialect::MySQL) {
94+
if (tok_.peek().type == TokenType::TK_INTO) {
95+
AstNode* into = parse_into();
96+
if (into) root->add_child(into);
97+
}
9198
}
9299
}
93100

@@ -99,6 +106,7 @@ class SelectParser {
99106
Arena& arena_;
100107
ExpressionParser<D> expr_parser_;
101108
TableRefParser<D> table_ref_parser_;
109+
bool compound_mode_;
102110

103111
// ---- SELECT options ----
104112

src/sql_parser/parser.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ ParseResult Parser<D>::parse_select_from_lparen() {
114114
AstNode* inner = nullptr;
115115
if (tokenizer_.peek().type == TokenType::TK_SELECT) {
116116
tokenizer_.skip(); // consume SELECT
117-
SelectParser<D> sp(tokenizer_, arena_);
117+
SelectParser<D> sp(tokenizer_, arena_, true);
118118
inner = sp.parse();
119119

120120
// Check for set operators inside the parens
@@ -133,7 +133,7 @@ ParseResult Parser<D>::parse_select_from_lparen() {
133133
if (tokenizer_.peek().type == TokenType::TK_SELECT) {
134134
tokenizer_.skip();
135135
}
136-
SelectParser<D> sp2(tokenizer_, arena_);
136+
SelectParser<D> sp2(tokenizer_, arena_, true);
137137
AstNode* right = sp2.parse();
138138

139139
AstNode* setop = make_node(arena_, NodeType::NODE_SET_OPERATION, op_text);
@@ -187,14 +187,14 @@ ParseResult Parser<D>::parse_select_from_lparen() {
187187
if (tokenizer_.peek().type == TokenType::TK_SELECT) {
188188
tokenizer_.skip();
189189
}
190-
SelectParser<D> sp3(tokenizer_, arena_);
190+
SelectParser<D> sp3(tokenizer_, arena_, true);
191191
right = sp3.parse();
192192
if (tokenizer_.peek().type == TokenType::TK_RPAREN) {
193193
tokenizer_.skip();
194194
}
195195
} else if (tokenizer_.peek().type == TokenType::TK_SELECT) {
196196
tokenizer_.skip();
197-
SelectParser<D> sp3(tokenizer_, arena_);
197+
SelectParser<D> sp3(tokenizer_, arena_, true);
198198
right = sp3.parse();
199199
}
200200

0 commit comments

Comments
 (0)