Skip to content

Commit b0ac674

Browse files
committed
Integrate CompoundQueryParser into parser and add emitter support
Update parse_select() to use CompoundQueryParser instead of SelectParser directly. Add emit_compound_query and emit_set_operation to emitter. Handle parenthesized SELECT at classifier level for queries starting with '('. Move FLAG_SET_OP_ALL to common.h for shared access.
1 parent b7ecc1b commit b0ac674

5 files changed

Lines changed: 226 additions & 5 deletions

File tree

include/sql_parser/common.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ inline int ci_cmp(const char* a, uint32_t alen, const char* b, uint8_t blen) {
5656
return 0;
5757
}
5858

59+
// -- Flags for NODE_SET_OPERATION --
60+
static constexpr uint16_t FLAG_SET_OP_ALL = 0x01;
61+
5962
// -- Statement type (always set, even for PARTIAL/ERROR) --
6063

6164
enum class StmtType : uint8_t {

include/sql_parser/compound_query_parser.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@
1111

1212
namespace sql_parser {
1313

14-
// Flag on NODE_SET_OPERATION to indicate ALL
15-
static constexpr uint16_t FLAG_SET_OP_ALL = 0x01;
16-
1714
template <Dialect D>
1815
class CompoundQueryParser {
1916
public:

include/sql_parser/emitter.h

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ class Emitter {
6868
case NodeType::NODE_STMT_OPTIONS: emit_stmt_options(node); break;
6969
case NodeType::NODE_UPDATE_SET_ITEM: emit_update_set_item(node); break;
7070

71+
// ---- Compound query ----
72+
case NodeType::NODE_COMPOUND_QUERY: emit_compound_query(node); break;
73+
case NodeType::NODE_SET_OPERATION: emit_set_operation(node); break;
74+
7175
// ---- DELETE statement ----
7276
case NodeType::NODE_DELETE_STMT: emit_delete_stmt(node); break;
7377
case NodeType::NODE_DELETE_USING_CLAUSE: emit_delete_using(node); break;
@@ -803,6 +807,36 @@ class Emitter {
803807
}
804808
}
805809

810+
// ---- Compound query ----
811+
812+
void emit_compound_query(const AstNode* node) {
813+
for (const AstNode* child = node->first_child; child; child = child->next_sibling) {
814+
if (child->type == NodeType::NODE_SET_OPERATION) {
815+
emit_set_operation(child);
816+
} else {
817+
// Trailing ORDER BY or LIMIT
818+
emit_node(child);
819+
}
820+
}
821+
}
822+
823+
void emit_set_operation(const AstNode* node) {
824+
const AstNode* left = node->first_child;
825+
const AstNode* right = left ? left->next_sibling : nullptr;
826+
827+
if (left) emit_node(left);
828+
829+
// Emit the operator: " UNION ", " UNION ALL ", " INTERSECT ", etc.
830+
sb_.append_char(' ');
831+
emit_value(node); // operator keyword text (UNION, INTERSECT, EXCEPT)
832+
if (node->flags & FLAG_SET_OP_ALL) {
833+
sb_.append(" ALL");
834+
}
835+
sb_.append_char(' ');
836+
837+
if (right) emit_node(right);
838+
}
839+
806840
// ---- Expressions ----
807841

808842
void emit_binary_op(const AstNode* node) {

include/sql_parser/parser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class Parser {
5252

5353
// Tier 1 parsers
5454
ParseResult parse_select();
55+
ParseResult parse_select_from_lparen();
5556
ParseResult parse_set();
5657
ParseResult parse_insert(bool is_replace = false);
5758
ParseResult parse_update();

src/sql_parser/parser.cpp

Lines changed: 188 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include "sql_parser/expression_parser.h"
33
#include "sql_parser/set_parser.h"
44
#include "sql_parser/select_parser.h"
5+
#include "sql_parser/compound_query_parser.h"
56
#include "sql_parser/insert_parser.h"
67
#include "sql_parser/update_parser.h"
78
#include "sql_parser/delete_parser.h"
@@ -38,6 +39,14 @@ ParseResult Parser<D>::classify_and_dispatch() {
3839

3940
switch (first.type) {
4041
case TokenType::TK_SELECT: return parse_select();
42+
case TokenType::TK_LPAREN: {
43+
// Parenthesized SELECT / compound query: (SELECT ...) UNION ...
44+
Token next = tokenizer_.peek();
45+
if (next.type == TokenType::TK_SELECT || next.type == TokenType::TK_LPAREN) {
46+
return parse_select_from_lparen();
47+
}
48+
return extract_unknown(first);
49+
}
4150
case TokenType::TK_SET: return parse_set();
4251
case TokenType::TK_INSERT: return parse_insert(false);
4352
case TokenType::TK_UPDATE: return parse_update();
@@ -74,8 +83,8 @@ ParseResult Parser<D>::parse_select() {
7483
ParseResult r;
7584
r.stmt_type = StmtType::SELECT;
7685

77-
SelectParser<D> select_parser(tokenizer_, arena_);
78-
AstNode* ast = select_parser.parse();
86+
CompoundQueryParser<D> compound_parser(tokenizer_, arena_);
87+
AstNode* ast = compound_parser.parse();
7988

8089
if (ast) {
8190
r.status = ParseResult::OK;
@@ -88,6 +97,183 @@ ParseResult Parser<D>::parse_select() {
8897
return r;
8998
}
9099

100+
template <Dialect D>
101+
ParseResult Parser<D>::parse_select_from_lparen() {
102+
// Called when classifier consumed '(' and peeked SELECT or '('
103+
// We need to parse the inner compound query, then check for set operators
104+
// after the closing ')'.
105+
//
106+
// Strategy: parse inner as a fresh compound expression, expect ')',
107+
// then check if a set operator follows (making this a compound query).
108+
109+
ParseResult r;
110+
r.stmt_type = StmtType::SELECT;
111+
112+
// We're inside '(' already consumed.
113+
// Parse inner: could be SELECT or another '('
114+
AstNode* inner = nullptr;
115+
if (tokenizer_.peek().type == TokenType::TK_SELECT) {
116+
tokenizer_.skip(); // consume SELECT
117+
SelectParser<D> sp(tokenizer_, arena_);
118+
inner = sp.parse();
119+
120+
// Check for set operators inside the parens
121+
Token t = tokenizer_.peek();
122+
while (t.type == TokenType::TK_UNION ||
123+
t.type == TokenType::TK_INTERSECT ||
124+
t.type == TokenType::TK_EXCEPT) {
125+
tokenizer_.skip();
126+
StringRef op_text = t.text;
127+
uint16_t flags = 0;
128+
if (tokenizer_.peek().type == TokenType::TK_ALL) {
129+
tokenizer_.skip();
130+
flags = FLAG_SET_OP_ALL;
131+
}
132+
// Next SELECT
133+
if (tokenizer_.peek().type == TokenType::TK_SELECT) {
134+
tokenizer_.skip();
135+
}
136+
SelectParser<D> sp2(tokenizer_, arena_);
137+
AstNode* right = sp2.parse();
138+
139+
AstNode* setop = make_node(arena_, NodeType::NODE_SET_OPERATION, op_text);
140+
if (setop) {
141+
setop->flags = flags;
142+
setop->add_child(inner);
143+
if (right) setop->add_child(right);
144+
inner = setop;
145+
}
146+
t = tokenizer_.peek();
147+
}
148+
} else {
149+
// Nested parenthesized -- recursively handle
150+
// This is an edge case; for now parse as compound
151+
CompoundQueryParser<D> cp(tokenizer_, arena_);
152+
inner = cp.parse();
153+
}
154+
155+
// Expect closing ')'
156+
if (tokenizer_.peek().type == TokenType::TK_RPAREN) {
157+
tokenizer_.skip();
158+
}
159+
160+
// Now check if a set operator follows after the ')'
161+
Token t = tokenizer_.peek();
162+
if (t.type == TokenType::TK_UNION ||
163+
t.type == TokenType::TK_INTERSECT ||
164+
t.type == TokenType::TK_EXCEPT) {
165+
// This is a compound query starting with a parenthesized operand.
166+
// Use CompoundQueryParser to continue, but we already have the left operand.
167+
// We'll build the compound manually.
168+
AstNode* left = inner;
169+
while (true) {
170+
t = tokenizer_.peek();
171+
if (t.type != TokenType::TK_UNION &&
172+
t.type != TokenType::TK_INTERSECT &&
173+
t.type != TokenType::TK_EXCEPT) break;
174+
175+
tokenizer_.skip();
176+
StringRef op_text = t.text;
177+
uint16_t flags = 0;
178+
if (tokenizer_.peek().type == TokenType::TK_ALL) {
179+
tokenizer_.skip();
180+
flags = FLAG_SET_OP_ALL;
181+
}
182+
183+
AstNode* right = nullptr;
184+
if (tokenizer_.peek().type == TokenType::TK_LPAREN) {
185+
// Parenthesized right operand
186+
tokenizer_.skip();
187+
if (tokenizer_.peek().type == TokenType::TK_SELECT) {
188+
tokenizer_.skip();
189+
}
190+
SelectParser<D> sp3(tokenizer_, arena_);
191+
right = sp3.parse();
192+
if (tokenizer_.peek().type == TokenType::TK_RPAREN) {
193+
tokenizer_.skip();
194+
}
195+
} else if (tokenizer_.peek().type == TokenType::TK_SELECT) {
196+
tokenizer_.skip();
197+
SelectParser<D> sp3(tokenizer_, arena_);
198+
right = sp3.parse();
199+
}
200+
201+
AstNode* setop = make_node(arena_, NodeType::NODE_SET_OPERATION, op_text);
202+
if (setop) {
203+
setop->flags = flags;
204+
setop->add_child(left);
205+
if (right) setop->add_child(right);
206+
left = setop;
207+
}
208+
}
209+
210+
// Wrap in COMPOUND_QUERY
211+
AstNode* compound = make_node(arena_, NodeType::NODE_COMPOUND_QUERY);
212+
if (compound) {
213+
compound->add_child(left);
214+
215+
// Trailing ORDER BY
216+
if (tokenizer_.peek().type == TokenType::TK_ORDER) {
217+
tokenizer_.skip();
218+
if (tokenizer_.peek().type == TokenType::TK_BY) tokenizer_.skip();
219+
ExpressionParser<D> ep(tokenizer_, arena_);
220+
AstNode* order_by = make_node(arena_, NodeType::NODE_ORDER_BY_CLAUSE);
221+
if (order_by) {
222+
while (true) {
223+
AstNode* expr = ep.parse();
224+
if (!expr) break;
225+
AstNode* item = make_node(arena_, NodeType::NODE_ORDER_BY_ITEM);
226+
item->add_child(expr);
227+
Token dir = tokenizer_.peek();
228+
if (dir.type == TokenType::TK_ASC || dir.type == TokenType::TK_DESC) {
229+
tokenizer_.skip();
230+
item->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, dir.text));
231+
}
232+
order_by->add_child(item);
233+
if (tokenizer_.peek().type == TokenType::TK_COMMA) {
234+
tokenizer_.skip();
235+
} else {
236+
break;
237+
}
238+
}
239+
compound->add_child(order_by);
240+
}
241+
}
242+
243+
// Trailing LIMIT
244+
if (tokenizer_.peek().type == TokenType::TK_LIMIT) {
245+
tokenizer_.skip();
246+
ExpressionParser<D> ep(tokenizer_, arena_);
247+
AstNode* limit = make_node(arena_, NodeType::NODE_LIMIT_CLAUSE);
248+
if (limit) {
249+
AstNode* val = ep.parse();
250+
if (val) limit->add_child(val);
251+
if (tokenizer_.peek().type == TokenType::TK_OFFSET) {
252+
tokenizer_.skip();
253+
AstNode* off = ep.parse();
254+
if (off) limit->add_child(off);
255+
}
256+
compound->add_child(limit);
257+
}
258+
}
259+
260+
r.status = ParseResult::OK;
261+
r.ast = compound;
262+
}
263+
} else {
264+
// Just a parenthesized SELECT, no compound
265+
if (inner) {
266+
r.status = ParseResult::OK;
267+
r.ast = inner;
268+
} else {
269+
r.status = ParseResult::PARTIAL;
270+
}
271+
}
272+
273+
scan_to_end(r);
274+
return r;
275+
}
276+
91277
template <Dialect D>
92278
ParseResult Parser<D>::parse_set() {
93279
ParseResult r;

0 commit comments

Comments
 (0)