Skip to content

Commit 387ace2

Browse files
committed
Added support for more queries
* SHOW FULL FIELDS * SHOW DATABASES * COMMIT * BEGIN
1 parent ed0594a commit 387ace2

3 files changed

Lines changed: 125 additions & 20 deletions

File tree

include/mysql_parser/mysql_ast.h

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,17 @@ enum class NodeType {
8484
NODE_LINES_TERMINATED_BY,
8585
NODE_CHARSET_OPTION, // For CHARACTER SET 'name' in OUTFILE
8686

87-
NODE_KEYWORD // For storing keywords like ALL, DISTINCT (as value) in some contexts
87+
NODE_KEYWORD, // For storing keywords like ALL, DISTINCT (as value) in some contexts
88+
89+
// Added for SHOW, BEGIN, COMMIT
90+
NODE_SHOW_STATEMENT,
91+
NODE_BEGIN_STATEMENT,
92+
NODE_COMMIT_STATEMENT,
93+
NODE_SHOW_OPTION_FULL, // For SHOW FULL ...
94+
NODE_SHOW_OPTION_FIELDS, // For SHOW ... FIELDS
95+
NODE_SHOW_TARGET_DATABASES, // For SHOW DATABASES
96+
NODE_TABLE_SPECIFICATION // For FROM table_name in SHOW FIELDS
97+
8898
};
8999

90100
// Structure for an AST Node
@@ -194,6 +204,13 @@ inline void print_ast(const AstNode* node, int indent = 0) {
194204
case NodeType::NODE_LINES_TERMINATED_BY: type_str = "LINES_TERMINATED_BY"; break;
195205
case NodeType::NODE_CHARSET_OPTION: type_str = "CHARSET_OPTION"; break;
196206
case NodeType::NODE_KEYWORD: type_str = "KEYWORD"; break;
207+
case NodeType::NODE_SHOW_STATEMENT: type_str = "SHOW_STMT"; break;
208+
case NodeType::NODE_BEGIN_STATEMENT: type_str = "BEGIN_STMT"; break;
209+
case NodeType::NODE_COMMIT_STATEMENT: type_str = "COMMIT_STMT"; break;
210+
case NodeType::NODE_SHOW_OPTION_FULL: type_str = "SHOW_OPT_FULL"; break;
211+
case NodeType::NODE_SHOW_OPTION_FIELDS: type_str = "SHOW_OPT_FIELDS"; break;
212+
case NodeType::NODE_SHOW_TARGET_DATABASES: type_str = "SHOW_TARGET_DB"; break;
213+
case NodeType::NODE_TABLE_SPECIFICATION: type_str = "TABLE_SPEC"; break;
197214
default: type_str = "UNHANDLED_TYPE(" + std::to_string(static_cast<int>(node->type)) + ")"; break;
198215
}
199216
std::cout << "Type: " << type_str;

src/mysql_parser/mysql_lexer.l

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ union MYSQL_YYSTYPE;
5252
"DEFAULT" { return TOKEN_DEFAULT; }
5353
"COLLATE" { return TOKEN_COLLATE; }
5454

55+
"SHOW" { return TOKEN_SHOW; }
56+
"FULL" { return TOKEN_FULL; } /* Already present for JOIN, ensure it's usable here */
57+
"FIELDS" { return TOKEN_FIELDS; } /* Already present for INTO OUTFILE, ensure it's usable here */
58+
"DATABASES" { return TOKEN_DATABASES; }
59+
"BEGIN" { return TOKEN_BEGIN; }
60+
"COMMIT" { return TOKEN_COMMIT; }
61+
5562
"DELETE" { return TOKEN_DELETE; }
5663
"LOW_PRIORITY" { return TOKEN_LOW_PRIORITY; }
5764
"QUICK" { return TOKEN_QUICK; }

src/mysql_parser/mysql_parser.y

Lines changed: 100 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
union MYSQL_YYSTYPE;
1313
int mysql_yylex(union MYSQL_YYSTYPE* yylval_param, yyscan_t yyscanner, MysqlParser::Parser* parser_context);
14-
1514
%}
1615

1716
%define api.prefix {mysql_yy}
@@ -60,6 +59,11 @@ int mysql_yylex(union MYSQL_YYSTYPE* yylval_param, yyscan_t yyscanner, MysqlPars
6059
%token TOKEN_MATCH TOKEN_AGAINST TOKEN_BOOLEAN TOKEN_MODE
6160

6261
%token TOKEN_IN // For IN BOOLEAN MODE, and potentially IN operator later
62+
%token TOKEN_SHOW TOKEN_DATABASES /* Added for SHOW DATABASES */
63+
/* TOKEN_FIELDS is already declared */
64+
/* TOKEN_FULL is already declared */
65+
%token TOKEN_BEGIN TOKEN_COMMIT /* Added for BEGIN/COMMIT */
66+
6367

6468
%token <str_val> TOKEN_QUIT
6569
%token <str_val> TOKEN_IDENTIFIER
@@ -68,7 +72,7 @@ int mysql_yylex(union MYSQL_YYSTYPE* yylval_param, yyscan_t yyscanner, MysqlPars
6872

6973
// Types
7074
%type <node_val> statement simple_statement command_statement select_statement insert_statement delete_statement
71-
%type <node_val> identifier_node string_literal_node number_literal_node value_for_insert optional_semicolon
75+
%type <node_val> identifier_node string_literal_node number_literal_node value_for_insert optional_semicolon show_statement begin_statement commit_statement
7276
%type <node_val> set_statement set_option_list set_option set_transaction_statement transaction_characteristic_list transaction_characteristic isolation_level_spec
7377
%type <node_val> variable_to_set user_variable system_variable_unqualified system_variable_qualified
7478
%type <node_val> variable_scope
@@ -79,7 +83,7 @@ int mysql_yylex(union MYSQL_YYSTYPE* yylval_param, yyscan_t yyscanner, MysqlPars
7983
%type <node_val> opt_delete_options delete_option delete_option_item_list
8084
%type <node_val> opt_where_clause opt_having_clause
8185
%type <node_val> opt_order_by_clause opt_limit_clause
82-
%type <node_val> order_by_list order_by_item opt_asc_desc
86+
%type <node_val> order_by_list order_by_item opt_asc_desc table_specification
8387
%type <node_val> table_name_list_for_delete
8488
%type <node_val> comparison_operator
8589
%type <node_val> qualified_identifier_node table_name_spec // Added table_name_spec
@@ -97,6 +101,9 @@ int mysql_yylex(union MYSQL_YYSTYPE* yylval_param, yyscan_t yyscanner, MysqlPars
97101
%type <node_val> opt_into_outfile_options_list opt_into_outfile_options_list_tail into_outfile_options_list into_outfile_option
98102
%type <node_val> fields_options_clause lines_options_clause field_option_outfile_list field_option_outfile line_option_outfile_list line_option_outfile
99103
%type <node_val> opt_locking_clause_list locking_clause_list locking_clause lock_strength opt_lock_table_list opt_lock_option
104+
%type <node_val> show_what show_full_modifier show_from_or_in
105+
106+
100107
%type <node_val> subquery derived_table
101108
102109
%type <node_val> single_input_statement // Type for the start symbol
@@ -158,7 +165,7 @@ query_list:
158165
| query_list statement { // This structure is for parsing multiple statements from one yyparse call.
159166
// If parser.parse() is meant to handle one statement string at a time,
160167
// this rule is not suitable as the main start symbol.
161-
}
168+
}
162169
;
163170
*/
164171

@@ -168,6 +175,9 @@ statement:
168175
| insert_statement { $$ = $1; if (parser_context) parser_context->internal_set_ast($1); }
169176
| set_statement { $$ = $1; if (parser_context) parser_context->internal_set_ast($1); }
170177
| delete_statement { $$ = $1; if (parser_context) parser_context->internal_set_ast($1); }
178+
| show_statement { $$ = $1; if (parser_context) parser_context->internal_set_ast($1); }
179+
| begin_statement { $$ = $1; if (parser_context) parser_context->internal_set_ast($1); }
180+
| commit_statement { $$ = $1; if (parser_context) parser_context->internal_set_ast($1); }
171181
;
172182

173183
simple_statement:
@@ -190,12 +200,14 @@ identifier_node:
190200
TOKEN_IDENTIFIER {
191201
std::string val = std::move(*$1);
192202
delete $1;
203+
// Unquoting logic for backticked identifiers
193204
if (val.length() >= 2 && val.front() == '`' && val.back() == '`') {
194205
val = val.substr(1, val.length() - 2);
206+
// Replace `` with `
195207
size_t pos = 0;
196208
while ((pos = val.find("``", pos)) != std::string::npos) {
197209
val.replace(pos, 2, "`");
198-
pos += 1;
210+
pos += 1; // Move past the replaced `
199211
}
200212
}
201213
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_IDENTIFIER, std::move(val));
@@ -207,25 +219,29 @@ qualified_identifier_node: // For table.column or schema.table
207219
std::string qualified_name = $1->value + "." + $3->value;
208220
// Create a generic node; specific handling might be needed based on context
209221
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_QUALIFIED_IDENTIFIER, std::move(qualified_name));
210-
$$->addChild($1);
211-
$$->addChild($3);
222+
$$->addChild($1); // table/schema
223+
$$->addChild($3); // column/table
212224
}
213225
// Potentially add schema.table.column or db.schema.table if needed, though usually context implies this
214226
;
215227

216-
217228
string_literal_node:
218229
TOKEN_STRING_LITERAL {
219230
std::string raw_val = std::move(*$1);
220231
delete $1;
221232
std::string val_content;
222233
char quote_char = 0;
223234
if (!raw_val.empty()) quote_char = raw_val.front();
235+
224236
if (raw_val.length() >= 2 && (raw_val.front() == '\'' || raw_val.front() == '"') && raw_val.front() == raw_val.back()) {
225237
val_content = raw_val.substr(1, raw_val.length() - 2);
226238
} else {
227-
val_content = raw_val; // Should not happen if lexer enforces quotes
239+
// This case might occur if the lexer returns unquoted strings for some reason,
240+
// or for types like hex literals X'...' that might not be strictly string literals
241+
// but are passed as TOKEN_STRING_LITERAL.
242+
val_content = raw_val;
228243
}
244+
229245
std::string unescaped_val;
230246
unescaped_val.reserve(val_content.length());
231247
bool escaping = false;
@@ -236,25 +252,39 @@ string_literal_node:
236252
case 't': unescaped_val += '\t'; break;
237253
case 'r': unescaped_val += '\r'; break;
238254
case 'b': unescaped_val += '\b'; break;
239-
case '0': unescaped_val += '\0'; break;
240-
case 'Z': unescaped_val += '\x1A'; break; // Ctrl+Z
255+
case '0': unescaped_val += '\0'; break; // Null character
256+
case 'Z': unescaped_val += '\x1A'; break; // Ctrl+Z for SUB
241257
case '\\': unescaped_val += '\\'; break;
242258
case '\'': unescaped_val += '\''; break;
243259
case '"': unescaped_val += '"'; break;
244-
default: unescaped_val += val_content[i]; break; // Other escaped chars
260+
// MySQL also allows escaping % and _ for LIKE contexts, but that's usually handled by the expression evaluation, not lexing/parsing of the literal itself.
261+
default:
262+
// If the character after \ is not a special escape char, MySQL treats \ as a literal \
263+
// However, standard SQL behavior is often to just take the character literally.
264+
// For simplicity here, let's assume it might be an escaped char that we just pass through,
265+
// or a literal backslash followed by a character.
266+
// A more robust parser might differentiate or follow strict SQL standard for unknown escapes.
267+
// For now, we'll treat it as literal character following backslash if not recognized.
268+
unescaped_val += val_content[i]; // Or just `unescaped_val += '\\'; unescaped_val += val_content[i];` if \ is always literal
269+
break;
245270
}
246271
escaping = false;
247272
} else if (val_content[i] == '\\') {
273+
// Check if it's a MySQL specific escape sequence that the lexer didn't handle
274+
// (e.g., if lexer is very basic and passes \ through).
275+
// Standard SQL string literals use '' for ' and "" for ".
276+
// MySQL also uses \', \", \\.
248277
escaping = true;
249-
} else if (quote_char != 0 && val_content[i] == quote_char && (i + 1 < val_content.length() && val_content[i+1] == quote_char) ) { // Handle '' or "" for literal quote
278+
} else if (quote_char != 0 && val_content[i] == quote_char && (i + 1 < val_content.length() && val_content[i+1] == quote_char) ) { // Handle '' or "" for literal quote (SQL Standard)
250279
unescaped_val += quote_char;
251-
i++;
280+
i++; // Skip the second quote
252281
}
253282
else {
254283
unescaped_val += val_content[i];
255284
}
256285
}
257-
if(escaping) unescaped_val+='\\'; // Trailing escape char
286+
if(escaping) unescaped_val+='\\'; // if string ends with a single backslash
287+
258288
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_STRING_LITERAL, std::move(unescaped_val));
259289
}
260290
;
@@ -644,7 +674,6 @@ joined_table:
644674
natural_join_type_str += " " + $2->children[1]->value; // "NATURAL LEFT"
645675
}
646676
join_node->value = natural_join_type_str + " JOIN";
647-
648677
join_node->addChild($1); // Left table
649678
// join_node->addChild($2); // The natural spec node itself - or just use its info for value
650679
join_node->addChild($4); // Right table
@@ -926,7 +955,7 @@ set_statement:
926955
MysqlParser::AstNode* set_vars_stmt = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_SET_STATEMENT, "SET_VARIABLES");
927956
set_vars_stmt->addChild($2);
928957
$$ = set_vars_stmt;
929-
}
958+
}
930959
| TOKEN_SET set_transaction_statement optional_semicolon { $$ = $2; }
931960
;
932961

@@ -1118,6 +1147,59 @@ grouping_element:
11181147
// Add WITH ROLLUP if needed
11191148
;
11201149

1150+
/* --- SHOW Statement Rules --- */
1151+
show_statement:
1152+
TOKEN_SHOW show_full_modifier show_what optional_semicolon {
1153+
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_SHOW_STATEMENT);
1154+
if ($2) $$->addChild($2); // show_full_modifier (can be null)
1155+
$$->addChild($3); // show_what
1156+
}
1157+
;
1158+
1159+
show_full_modifier:
1160+
/* empty */ { $$ = nullptr; }
1161+
| TOKEN_FULL { $$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_SHOW_OPTION_FULL, "FULL"); }
1162+
;
1163+
1164+
show_what:
1165+
TOKEN_DATABASES {
1166+
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_SHOW_TARGET_DATABASES, "DATABASES");
1167+
}
1168+
| TOKEN_FIELDS show_from_or_in table_specification {
1169+
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_SHOW_OPTION_FIELDS, "FIELDS");
1170+
// $2 is show_from_or_in which is just a keyword placeholder for now, so not adding as child.
1171+
$$->addChild($3); // table_specification
1172+
}
1173+
// Add other SHOW variants as needed, e.g., SHOW TABLES, SHOW STATUS, etc.
1174+
// Example: SHOW TABLES [FROM db_name] [LIKE 'pattern' | WHERE expr]
1175+
// SHOW CREATE TABLE table_name
1176+
;
1177+
1178+
show_from_or_in:
1179+
TOKEN_FROM { $$ = nullptr; /* keyword only, not stored as node */ }
1180+
| TOKEN_IN { $$ = nullptr; /* keyword only, not stored as node */ }
1181+
;
1182+
1183+
table_specification: // Used by SHOW FIELDS FROM table_name
1184+
table_name_spec { // Re-use table_name_spec which handles identifier_node and qualified_identifier_node
1185+
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_TABLE_SPECIFICATION);
1186+
$$->addChild($1); // table_name_spec node which contains table_name or schema.table_name
1187+
}
1188+
;
1189+
1190+
1191+
/* --- BEGIN/COMMIT Statement Rules --- */
1192+
begin_statement:
1193+
TOKEN_BEGIN optional_semicolon {
1194+
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_BEGIN_STATEMENT, "BEGIN");
1195+
}
1196+
;
1197+
commit_statement:
1198+
TOKEN_COMMIT optional_semicolon {
1199+
$$ = new MysqlParser::AstNode(MysqlParser::NodeType::NODE_COMMIT_STATEMENT, "COMMIT");
1200+
}
1201+
;
1202+
11211203
/* --- Expression related rules --- */
11221204
expression_placeholder:
11231205
simple_expression { $$ = $1; }
@@ -1229,7 +1311,7 @@ function_call_placeholder:
12291311
} else {
12301312
// Add an empty list node for functions with no arguments, e.g., NOW()
12311313
// This ensures the function call node always has a child for arguments, even if empty.
1232-
$$->addChild(new MysqlParser::AstNode(MysqlParser::NodeType::NODE_EXPRESSION_PLACEHOLDER, "empty_arg_list_wrapper"));
1314+
$$->addChild(new MysqlParser::AstNode(MysqlParser::NodeType::NODE_EXPRESSION_PLACEHOLDER, "empty_arg_list_wrapper"));
12331315
}
12341316
}
12351317
;
@@ -1251,4 +1333,3 @@ opt_expression_placeholder_list:
12511333
// }
12521334
// The default yyerror or the one provided by %define parse.error verbose should be sufficient.
12531335
// If you need custom error formatting or location tracking, you'd define mysql_yyerror here.
1254-

0 commit comments

Comments
 (0)