Skip to content

Commit b63b194

Browse files
committed
feat: add tuple, ARRAY constructor, array subscript, and field access expressions
- NODE_TUPLE: ROW(1, 2, 3) row constructors - NODE_ARRAY_CONSTRUCTOR: ARRAY['a', 'b'] with bracket syntax - NODE_ARRAY_SUBSCRIPT: expr[index] postfix subscript - NODE_FIELD_ACCESS: (expr).field postfix access - New tokens: TK_LBRACKET, TK_RBRACKET, TK_ARRAY, TK_ROW - Emitter support for all new node types Partial fix for #19. Bare tuple (1,2,3) without ROW keyword remains ambiguous with select-item comma separation in SELECT context.
1 parent 5f671c1 commit b63b194

7 files changed

Lines changed: 154 additions & 3 deletions

File tree

include/sql_parser/common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,10 @@ enum class NodeType : uint16_t {
150150
NODE_BETWEEN,
151151
NODE_IN_LIST,
152152
NODE_CASE_WHEN,
153+
NODE_TUPLE, // (expr, expr, ...) row constructor
154+
NODE_ARRAY_CONSTRUCTOR, // ARRAY[val, val, ...]
155+
NODE_ARRAY_SUBSCRIPT, // expr[index]
156+
NODE_FIELD_ACCESS, // (expr).field postfix access
153157

154158
// INSERT nodes
155159
NODE_INSERT_STMT,

include/sql_parser/emitter.h

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ class Emitter {
113113
case NodeType::NODE_BETWEEN: emit_between(node); break;
114114
case NodeType::NODE_IN_LIST: emit_in_list(node); break;
115115
case NodeType::NODE_CASE_WHEN: emit_case_when(node); break;
116+
case NodeType::NODE_TUPLE: emit_tuple(node); break;
117+
case NodeType::NODE_ARRAY_CONSTRUCTOR: emit_array_constructor(node); break;
118+
case NodeType::NODE_ARRAY_SUBSCRIPT: emit_array_subscript(node); break;
119+
case NodeType::NODE_FIELD_ACCESS: emit_field_access(node); break;
116120
case NodeType::NODE_SUBQUERY: emit_value(node); break;
117121

118122
// ---- Leaf nodes (emit value directly) ----
@@ -1116,6 +1120,50 @@ class Emitter {
11161120
}
11171121
sb_.append("END");
11181122
}
1123+
1124+
void emit_tuple(const AstNode* node) {
1125+
// ROW keyword prefix if present
1126+
if (node->value_len > 0) {
1127+
emit_value(node);
1128+
}
1129+
sb_.append_char('(');
1130+
bool first = true;
1131+
for (const AstNode* child = node->first_child; child; child = child->next_sibling) {
1132+
if (!first) sb_.append(", ");
1133+
first = false;
1134+
emit_node(child);
1135+
}
1136+
sb_.append_char(')');
1137+
}
1138+
1139+
void emit_array_constructor(const AstNode* node) {
1140+
sb_.append("ARRAY[");
1141+
bool first = true;
1142+
for (const AstNode* child = node->first_child; child; child = child->next_sibling) {
1143+
if (!first) sb_.append(", ");
1144+
first = false;
1145+
emit_node(child);
1146+
}
1147+
sb_.append_char(']');
1148+
}
1149+
1150+
void emit_array_subscript(const AstNode* node) {
1151+
const AstNode* expr = node->first_child;
1152+
const AstNode* index = expr ? expr->next_sibling : nullptr;
1153+
if (expr) emit_node(expr);
1154+
sb_.append_char('[');
1155+
if (index) emit_node(index);
1156+
sb_.append_char(']');
1157+
}
1158+
1159+
void emit_field_access(const AstNode* node) {
1160+
const AstNode* expr = node->first_child;
1161+
const AstNode* field = expr ? expr->next_sibling : nullptr;
1162+
sb_.append_char('(');
1163+
if (expr) emit_node(expr);
1164+
sb_.append(").");
1165+
if (field) emit_node(field);
1166+
}
11191167
};
11201168

11211169
} // namespace sql_parser

include/sql_parser/expression_parser.h

Lines changed: 92 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,29 @@ class ExpressionParser {
152152
}
153153
return node;
154154
}
155+
case TokenType::TK_ARRAY: {
156+
tok_.skip();
157+
return parse_array_constructor();
158+
}
159+
case TokenType::TK_ROW: {
160+
// ROW(expr, expr, ...) — explicit row constructor
161+
tok_.skip();
162+
if (tok_.peek().type == TokenType::TK_LPAREN) {
163+
tok_.skip();
164+
AstNode* tuple = make_node(arena_, NodeType::NODE_TUPLE, t.text);
165+
if (tok_.peek().type != TokenType::TK_RPAREN) {
166+
while (true) {
167+
AstNode* elem = parse();
168+
if (elem) tuple->add_child(elem);
169+
if (tok_.peek().type == TokenType::TK_COMMA) tok_.skip();
170+
else break;
171+
}
172+
}
173+
if (tok_.peek().type == TokenType::TK_RPAREN) tok_.skip();
174+
return parse_postfix(tuple);
175+
}
176+
return make_node(arena_, NodeType::NODE_IDENTIFIER, t.text);
177+
}
155178
case TokenType::TK_CASE: {
156179
tok_.skip();
157180
return parse_case();
@@ -160,16 +183,34 @@ class ExpressionParser {
160183
tok_.skip();
161184
// Could be subquery: (SELECT ...)
162185
if (tok_.peek().type == TokenType::TK_SELECT) {
163-
// Subquery — for now, skip to matching paren
164186
AstNode* node = make_node(arena_, NodeType::NODE_SUBQUERY);
165187
skip_to_matching_paren();
166-
return node;
188+
return parse_postfix(node);
189+
}
190+
// Empty tuple: ()
191+
if (tok_.peek().type == TokenType::TK_RPAREN) {
192+
tok_.skip();
193+
AstNode* tuple = make_node(arena_, NodeType::NODE_TUPLE);
194+
return parse_postfix(tuple);
167195
}
168196
AstNode* expr = parse();
197+
if (tok_.peek().type == TokenType::TK_COMMA) {
198+
// Tuple: (expr, expr, ...)
199+
AstNode* tuple = make_node(arena_, NodeType::NODE_TUPLE);
200+
if (expr) tuple->add_child(expr);
201+
while (tok_.peek().type == TokenType::TK_COMMA) {
202+
tok_.skip();
203+
AstNode* elem = parse();
204+
if (elem) tuple->add_child(elem);
205+
}
206+
if (tok_.peek().type == TokenType::TK_RPAREN) tok_.skip();
207+
return parse_postfix(tuple);
208+
}
169209
if (tok_.peek().type == TokenType::TK_RPAREN) {
170210
tok_.skip();
171211
}
172-
return expr;
212+
// Check for postfix: (expr).field or (expr)[index]
213+
return parse_postfix(expr);
173214
}
174215
case TokenType::TK_IDENTIFIER: {
175216
tok_.skip();
@@ -394,6 +435,52 @@ class ExpressionParser {
394435
return node;
395436
}
396437

438+
// ARRAY[val, val, ...] constructor
439+
AstNode* parse_array_constructor() {
440+
AstNode* arr = make_node(arena_, NodeType::NODE_ARRAY_CONSTRUCTOR);
441+
if (tok_.peek().type == TokenType::TK_LBRACKET) {
442+
tok_.skip();
443+
if (tok_.peek().type != TokenType::TK_RBRACKET) {
444+
while (true) {
445+
AstNode* elem = parse();
446+
if (elem) arr->add_child(elem);
447+
if (tok_.peek().type == TokenType::TK_COMMA) tok_.skip();
448+
else break;
449+
}
450+
}
451+
if (tok_.peek().type == TokenType::TK_RBRACKET) tok_.skip();
452+
}
453+
return parse_postfix(arr);
454+
}
455+
456+
// Handle postfix operators: .field, [index]
457+
AstNode* parse_postfix(AstNode* expr) {
458+
while (true) {
459+
Token t = tok_.peek();
460+
if (t.type == TokenType::TK_DOT) {
461+
// Field access: (expr).field or (expr).*
462+
tok_.skip();
463+
Token field = tok_.next_token();
464+
AstNode* access = make_node(arena_, NodeType::NODE_FIELD_ACCESS);
465+
access->add_child(expr);
466+
access->add_child(make_node(arena_, NodeType::NODE_IDENTIFIER, field.text));
467+
expr = access;
468+
} else if (t.type == TokenType::TK_LBRACKET) {
469+
// Array subscript: expr[index]
470+
tok_.skip();
471+
AstNode* index = parse();
472+
if (tok_.peek().type == TokenType::TK_RBRACKET) tok_.skip();
473+
AstNode* subscript = make_node(arena_, NodeType::NODE_ARRAY_SUBSCRIPT);
474+
subscript->add_child(expr);
475+
if (index) subscript->add_child(index);
476+
expr = subscript;
477+
} else {
478+
break;
479+
}
480+
}
481+
return expr;
482+
}
483+
397484
// Skip tokens until matching closing paren (handles nesting)
398485
void skip_to_matching_paren() {
399486
int depth = 1;
@@ -473,6 +560,8 @@ class ExpressionParser {
473560
case TokenType::TK_COLUMNS:
474561
case TokenType::TK_FIELDS:
475562
case TokenType::TK_ROWS:
563+
case TokenType::TK_ARRAY:
564+
case TokenType::TK_ROW:
476565
return true;
477566
default:
478567
return false;

include/sql_parser/keywords_mysql.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ inline constexpr KeywordEntry KEYWORDS[] = {
1919
{"ALTER", 5, TokenType::TK_ALTER},
2020
{"ANALYZE", 7, TokenType::TK_ANALYZE},
2121
{"AND", 3, TokenType::TK_AND},
22+
{"ARRAY", 5, TokenType::TK_ARRAY},
2223
{"AS", 2, TokenType::TK_AS},
2324
{"ASC", 3, TokenType::TK_ASC},
2425
{"AVG", 3, TokenType::TK_AVG},
@@ -126,6 +127,7 @@ inline constexpr KeywordEntry KEYWORDS[] = {
126127
{"REVOKE", 6, TokenType::TK_REVOKE},
127128
{"RIGHT", 5, TokenType::TK_RIGHT},
128129
{"ROLLBACK", 8, TokenType::TK_ROLLBACK},
130+
{"ROW", 3, TokenType::TK_ROW},
129131
{"ROWS", 4, TokenType::TK_ROWS},
130132
{"SAVEPOINT", 9, TokenType::TK_SAVEPOINT},
131133
{"SCHEMA", 6, TokenType::TK_SCHEMA},

include/sql_parser/keywords_pgsql.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ inline constexpr KeywordEntry KEYWORDS[] = {
1717
{"ALTER", 5, TokenType::TK_ALTER},
1818
{"ANALYZE", 7, TokenType::TK_ANALYZE},
1919
{"AND", 3, TokenType::TK_AND},
20+
{"ARRAY", 5, TokenType::TK_ARRAY},
2021
{"AS", 2, TokenType::TK_AS},
2122
{"ASC", 3, TokenType::TK_ASC},
2223
{"AVG", 3, TokenType::TK_AVG},
@@ -102,6 +103,7 @@ inline constexpr KeywordEntry KEYWORDS[] = {
102103
{"REVOKE", 6, TokenType::TK_REVOKE},
103104
{"RIGHT", 5, TokenType::TK_RIGHT},
104105
{"ROLLBACK", 8, TokenType::TK_ROLLBACK},
106+
{"ROW", 3, TokenType::TK_ROW},
105107
{"SAVEPOINT", 9, TokenType::TK_SAVEPOINT},
106108
{"SCHEMA", 6, TokenType::TK_SCHEMA},
107109
{"SELECT", 6, TokenType::TK_SELECT},

include/sql_parser/token.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ enum class TokenType : uint16_t {
1515
TK_STRING,
1616
TK_LPAREN,
1717
TK_RPAREN,
18+
TK_LBRACKET, // [
19+
TK_RBRACKET, // ]
1820
TK_COMMA,
1921
TK_SEMICOLON,
2022
TK_DOT,
@@ -105,6 +107,8 @@ enum class TokenType : uint16_t {
105107
TK_COLUMNS,
106108
TK_FIELDS,
107109
TK_ROWS,
110+
TK_ARRAY,
111+
TK_ROW,
108112
};
109113

110114
struct Token {

include/sql_parser/tokenizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,8 @@ class Tokenizer {
340340
switch (c) {
341341
case '(': return make_token(TokenType::TK_LPAREN, s, 1);
342342
case ')': return make_token(TokenType::TK_RPAREN, s, 1);
343+
case '[': return make_token(TokenType::TK_LBRACKET, s, 1);
344+
case ']': return make_token(TokenType::TK_RBRACKET, s, 1);
343345
case ',': return make_token(TokenType::TK_COMMA, s, 1);
344346
case ';': return make_token(TokenType::TK_SEMICOLON, s, 1);
345347
case '.': return make_token(TokenType::TK_DOT, s, 1);

0 commit comments

Comments
 (0)