Skip to content

Commit f021273

Browse files
committed
Add support for DELETE statements
1 parent 8df98a9 commit f021273

4 files changed

Lines changed: 424 additions & 116 deletions

File tree

examples/main_mysql_example.cpp

Lines changed: 90 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,104 @@
1-
#include "mysql_parser/mysql_parser.h" // Changed include path and namespace
1+
#include "mysql_parser/mysql_parser.h" // Ensure this path is correct for your include setup
22
#include <iostream>
33
#include <vector>
44
#include <string>
55

6+
void parse_and_print(MysqlParser::Parser& parser, const std::string& query_type, const std::string& query) {
7+
std::cout << "------------------------------------------\n";
8+
std::cout << "Parsing MySQL " << query_type << " query: " << query << std::endl;
9+
10+
parser.clearErrors();
11+
std::unique_ptr<MysqlParser::AstNode> ast = parser.parse(query);
12+
13+
if (ast) {
14+
std::cout << "Parsing successful!" << std::endl;
15+
MysqlParser::print_ast(ast.get());
16+
} else {
17+
std::cout << "Parsing failed." << std::endl;
18+
const auto& errors = parser.getErrors();
19+
if (errors.empty()) {
20+
std::cout << " (No specific error messages, check parser logic or mysql_yyerror)" << std::endl;
21+
} else {
22+
for (const auto& error : errors) {
23+
std::cout << " Error: " << error << std::endl;
24+
}
25+
}
26+
}
27+
}
28+
629
int main() {
7-
MysqlParser::Parser parser; // Changed namespace
30+
MysqlParser::Parser parser;
831

9-
std::vector<std::string> queries = {
32+
std::vector<std::string> select_queries = {
1033
"SELECT name FROM users;",
11-
"SELECT * FROM `orders`;", // MySQL backticked identifier
34+
"SELECT * FROM `orders`;",
35+
"SELECT * FROM tablenameB" // No semicolon
36+
};
37+
38+
std::vector<std::string> insert_queries = {
1239
"INSERT INTO products VALUES ('a new gadget');",
13-
"INSERT INTO logs VALUES (\"Error message with double quotes\");", // MySQL double quotes
14-
"INSERT INTO `special-table` VALUES ('escaped value \\'single quote\\' and \\\\ backslash');", // MySQL escapes
15-
"QUIT", // MySQL often doesn't require semicolon for last statement in a batch
16-
"SELECT * FROM WHERE;",
17-
"INSERT INTO logs VALUES (no_quotes_here);"
40+
"INSERT INTO logs VALUES (\"Error message with double quotes\")", // No semicolon
41+
"INSERT INTO `special-table` VALUES ('escaped value \\'single quote\\' and \\\\ backslash');"
1842
};
1943

20-
for (const auto& query : queries) {
21-
std::cout << "------------------------------------------\n";
22-
std::cout << "Parsing MySQL query: " << query << std::endl;
23-
24-
parser.clearErrors();
25-
std::unique_ptr<MysqlParser::AstNode> ast = parser.parse(query); // Changed namespace
44+
std::vector<std::string> set_queries = {
45+
"SET @my_user_var = 'hello world';",
46+
"SET @anotherVar = 12345;",
47+
"SET global max_connections = 1000", // No semicolon
48+
"SET @@session.net_write_timeout = 120;",
49+
"SET NAMES 'utf8mb4' COLLATE 'utf8mb4_unicode_ci';",
50+
"SET CHARACTER SET DEFAULT",
51+
"SET @a = 1, @b = 'two', global max_heap_table_size = 128000000;"
52+
};
2653

27-
if (ast) {
28-
std::cout << "Parsing successful!" << std::endl;
29-
MysqlParser::print_ast(ast.get()); // Changed namespace
30-
} else {
31-
std::cout << "Parsing failed." << std::endl;
32-
const auto& errors = parser.getErrors();
33-
if (errors.empty()) {
34-
std::cout << " (No specific error messages, check parser logic or mysql_yyerror)" << std::endl;
35-
} else {
36-
for (const auto& error : errors) {
37-
std::cout << " Error: " << error << std::endl;
38-
}
39-
}
40-
}
54+
std::vector<std::string> delete_queries = {
55+
// Single-table DELETE statements
56+
"DELETE FROM customers WHERE customer_id = 101;",
57+
"DELETE LOW_PRIORITY FROM orders WHERE order_date < '2023-01-01'",
58+
"DELETE QUICK IGNORE FROM logs WHERE log_level = 'DEBUG' ORDER BY timestamp DESC LIMIT 1000;",
59+
"DELETE FROM events WHERE event_name = `expired-event`", // Backticked identifier for value
60+
61+
// Multi-table DELETE statements (simplified, based on current grammar)
62+
"DELETE t1 FROM table1 AS t1, table2 AS t2 WHERE t1.id = t2.ref_id;", // Needs table_reference_list_placeholder to be more robust
63+
"DELETE FROM t1, t2 USING table1 AS t1 INNER JOIN table2 AS t2 ON t1.key = t2.key WHERE t1.value > 100;", // Also simplified
64+
65+
// DELETE without semicolon
66+
"DELETE FROM old_records WHERE last_accessed < '2020-01-01'",
67+
68+
// Potentially problematic or error cases for DELETE
69+
"DELETE quick low_priority from test_table", // Order of options might matter or not be fully supported yet
70+
"DELETE FROM table1 WHERE id = ", // Incomplete WHERE
71+
"DELETE tbl1 tbl2 FROM table_references" // Common MySQL multi-table, current grammar might simplify tbl1, tbl2 part
72+
};
73+
74+
75+
std::cout << "\n======= SELECT QUERIES =======\n";
76+
for (const auto& query : select_queries) {
77+
parse_and_print(parser, "SELECT", query);
78+
}
79+
80+
std::cout << "\n======= INSERT QUERIES =======\n";
81+
for (const auto& query : insert_queries) {
82+
parse_and_print(parser, "INSERT", query);
4183
}
84+
85+
std::cout << "\n======= SET QUERIES =======\n";
86+
for (const auto& query : set_queries) {
87+
parse_and_print(parser, "SET", query);
88+
}
89+
90+
std::cout << "\n======= DELETE QUERIES =======\n";
91+
for (const auto& query : delete_queries) {
92+
parse_and_print(parser, "DELETE", query);
93+
}
94+
95+
// Example of a known failing query (due to function call in expression_placeholder)
96+
std::cout << "\n======= KNOWN FAILING SET QUERY (Function Call) =======\n";
97+
parse_and_print(parser, "SET", "SET @myvar = some_function(1, 'a');");
98+
99+
std::cout << "\n======= KNOWN FAILING SET QUERY (Invalid Identifier) =======\n";
100+
parse_and_print(parser, "SET", "SET global invalid-variable = 100;");
101+
102+
42103
return 0;
43104
}

include/mysql_parser/mysql_ast.h

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,38 @@
44
#include <string>
55
#include <vector>
66
#include <iostream>
7-
#include <algorithm>
7+
#include <algorithm>
88

9-
namespace MysqlParser { // Changed namespace
9+
namespace MysqlParser {
1010

11-
// NodeType enum can be identical for this stage, or diverge later
1211
enum class NodeType {
1312
NODE_UNKNOWN,
1413
NODE_COMMAND,
1514
NODE_SELECT_STATEMENT,
1615
NODE_INSERT_STATEMENT,
16+
NODE_DELETE_STATEMENT,
1717
NODE_IDENTIFIER,
1818
NODE_STRING_LITERAL,
1919
NODE_ASTERISK,
2020
NODE_SET_STATEMENT,
21-
NODE_VARIABLE_ASSIGNMENT, // for @var = expr or sysvar = expr
22-
NODE_USER_VARIABLE, // for @varname
23-
NODE_SYSTEM_VARIABLE, // for sysvar (can have scope)
24-
NODE_VARIABLE_SCOPE, // GLOBAL, SESSION, PERSIST, PERSIST_ONLY
25-
NODE_EXPRESSION_PLACEHOLDER,// Placeholder for complex expressions
26-
NODE_SET_NAMES, // for SET NAMES
27-
NODE_SET_CHARSET // for SET CHARSET
21+
NODE_VARIABLE_ASSIGNMENT,
22+
NODE_USER_VARIABLE,
23+
NODE_SYSTEM_VARIABLE,
24+
NODE_VARIABLE_SCOPE,
25+
NODE_EXPRESSION_PLACEHOLDER,
26+
NODE_SET_NAMES,
27+
NODE_SET_CHARSET,
28+
NODE_DELETE_OPTIONS,
29+
NODE_TABLE_NAME_LIST,
30+
NODE_FROM_CLAUSE,
31+
NODE_USING_CLAUSE,
32+
NODE_WHERE_CLAUSE,
33+
NODE_ORDER_BY_CLAUSE,
34+
NODE_ORDER_BY_ITEM,
35+
NODE_LIMIT_CLAUSE,
36+
NODE_COMPARISON_EXPRESSION,
37+
NODE_OPERATOR,
38+
NODE_QUALIFIED_IDENTIFIER
2839
};
2940

3041
struct AstNode {
@@ -63,6 +74,7 @@ inline void print_ast(const AstNode* node, int indent = 0) {
6374
case NodeType::NODE_COMMAND: type_str = "COMMAND"; break;
6475
case NodeType::NODE_SELECT_STATEMENT: type_str = "SELECT_STMT"; break;
6576
case NodeType::NODE_INSERT_STATEMENT: type_str = "INSERT_STMT"; break;
77+
case NodeType::NODE_DELETE_STATEMENT: type_str = "DELETE_STMT"; break;
6678
case NodeType::NODE_IDENTIFIER: type_str = "IDENTIFIER"; break;
6779
case NodeType::NODE_STRING_LITERAL: type_str = "STRING_LITERAL"; break;
6880
case NodeType::NODE_ASTERISK: type_str = "ASTERISK"; break;
@@ -74,6 +86,17 @@ inline void print_ast(const AstNode* node, int indent = 0) {
7486
case NodeType::NODE_EXPRESSION_PLACEHOLDER: type_str = "EXPR_PLACEHOLDER"; break;
7587
case NodeType::NODE_SET_NAMES: type_str = "SET_NAMES"; break;
7688
case NodeType::NODE_SET_CHARSET: type_str = "SET_CHARSET"; break;
89+
case NodeType::NODE_DELETE_OPTIONS: type_str = "DELETE_OPTIONS"; break;
90+
case NodeType::NODE_TABLE_NAME_LIST: type_str = "TABLE_NAME_LIST"; break;
91+
case NodeType::NODE_FROM_CLAUSE: type_str = "FROM_CLAUSE"; break;
92+
case NodeType::NODE_USING_CLAUSE: type_str = "USING_CLAUSE"; break;
93+
case NodeType::NODE_WHERE_CLAUSE: type_str = "WHERE_CLAUSE"; break;
94+
case NodeType::NODE_ORDER_BY_CLAUSE: type_str = "ORDER_BY_CLAUSE"; break;
95+
case NodeType::NODE_ORDER_BY_ITEM: type_str = "ORDER_BY_ITEM"; break;
96+
case NodeType::NODE_LIMIT_CLAUSE: type_str = "LIMIT_CLAUSE"; break;
97+
case NodeType::NODE_COMPARISON_EXPRESSION: type_str = "COMPARISON_EXPR"; break;
98+
case NodeType::NODE_OPERATOR: type_str = "OPERATOR"; break; // <<< ADDED CASE
99+
case NodeType::NODE_QUALIFIED_IDENTIFIER: type_str = "QUALIFIED_IDENTIFIER"; break;
77100
default: type_str = "UNHANDLED_TYPE(" + std::to_string(static_cast<int>(node->type)) + ")"; break;
78101
}
79102
std::cout << "Type: " << type_str << ", Value: '" << node->value << "'" << std::endl;

src/mysql_parser/mysql_lexer.l

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,28 @@
33
%option extra-type="MysqlParser::Parser*"
44

55
%{
6-
#include "mysql_parser/mysql_parser.h"
7-
#include "mysql_parser/mysql_ast.h"
8-
#include "mysql_parser.tab.h" // Will be mysql_parser.tab.h
6+
#include "mysql_parser/mysql_parser.h" // For MysqlParser::Parser, yyscan_t
7+
#include "mysql_parser/mysql_ast.h" // For MysqlParser::AstNode, etc.
8+
#include "mysql_parser.tab.h" // Bison-generated: token enums, defines union MYSQL_YYSTYPE and YYSTYPE
99
#include <string>
1010
#include <vector>
1111

12-
// YY_DECL to control the signature of mysql_yylex
12+
// YY_DECL to control the signature of the generated mysql_yylex function.
13+
// This signature MUST match how Bison calls it (influenced by %lex-param in .y file
14+
// and the extern declaration of mysql_yylex in the .y file's prologue).
15+
// We use the explicit union name as deduced from previous Bison error messages.
16+
// The actual definition of 'union MYSQL_YYSTYPE' comes from "mysql_parser.tab.h".
1317
union MYSQL_YYSTYPE; // Forward declare for YY_DECL (Bison defines this in mysql_parser.tab.h)
1418
#undef YY_DECL
1519
#define YY_DECL int mysql_yylex (union MYSQL_YYSTYPE *yylval_param, yyscan_t yyscanner, MysqlParser::Parser* parser_context)
1620

17-
// #define YY_USER_DATA ((MysqlParser::Parser*)yyget_extra(yyscanner)) // Alternative way to access context
21+
// YY_USER_DATA can be used to access parser_context IF it's passed as yyextra,
22+
// OR if parser_context from YY_DECL is directly used.
23+
// Since parser_context is now a direct parameter to mysql_yylex via YY_DECL,
24+
// we can use it directly in actions.
25+
// #define YY_USER_DATA ((MysqlParser::Parser*)yyget_extra(yyscanner))
1826
%}
1927

20-
/* Declare exclusive start conditions */
2128
%x COMMENT
2229
%x SQSTRING
2330
%x DQSTRING
@@ -27,9 +34,9 @@ union MYSQL_YYSTYPE; // Forward declare for YY_DECL (Bison defines this in mysql
2734

2835
<INITIAL>{
2936
"/*" { BEGIN(COMMENT); }
30-
"-- ".* { /* MySQL -- comment (note space) */ }
31-
"--\n" { /* MySQL -- comment followed by newline */ }
32-
"#".* { /* MySQL # comment */ }
37+
"-- ".* { /* MySQL -- comment (note space); no action, just ignore */ }
38+
"--\n" { /* MySQL -- comment followed by newline; no action */ }
39+
"#".* { /* MySQL # comment; no action */ }
3340

3441
[ \t\n]+ { /* Ignore whitespace */ }
3542

@@ -50,16 +57,30 @@ union MYSQL_YYSTYPE; // Forward declare for YY_DECL (Bison defines this in mysql
5057
"DEFAULT" { return TOKEN_DEFAULT; }
5158
"COLLATE" { return TOKEN_COLLATE; }
5259

60+
"DELETE" { return TOKEN_DELETE; }
61+
"LOW_PRIORITY" { return TOKEN_LOW_PRIORITY; }
62+
"QUICK" { return TOKEN_QUICK; }
63+
"IGNORE" { return TOKEN_IGNORE_SYM; }
64+
"USING" { return TOKEN_USING; }
65+
"ORDER" { return TOKEN_ORDER; }
66+
"BY" { return TOKEN_BY; }
67+
"LIMIT" { return TOKEN_LIMIT; }
68+
"ASC" { return TOKEN_ASC; }
69+
"DESC" { return TOKEN_DESC; }
70+
"WHERE" { return TOKEN_WHERE; }
71+
"AS" { return TOKEN_AS; }
72+
73+
5374
"`" { yylval_param->str_val = new std::string(); BEGIN(BTIDENT); }
5475

5576
"@@global." { return TOKEN_GLOBAL_VAR_PREFIX; }
5677
"@@session." { return TOKEN_SESSION_VAR_PREFIX; }
5778
"@@local." { return TOKEN_SESSION_VAR_PREFIX; } /* Alias for session */
58-
"@@persisted." { /* Placeholder if you add specific handling */ return TOKEN_PERSIST_VAR_PREFIX; }
79+
"@@persisted." { return TOKEN_PERSIST_VAR_PREFIX; } /* If you add specific handling */
5980
"@@" { return TOKEN_DOUBLESPECIAL; }
6081
"@" { return TOKEN_SPECIAL; }
6182

62-
[a-zA-Z_][a-zA-Z0-9_]* { /* Does not include hyphen for now */
83+
[a-zA-Z_][a-zA-Z0-9_]* {
6384
yylval_param->str_val = new std::string(yytext);
6485
return TOKEN_IDENTIFIER;
6586
}
@@ -71,10 +92,18 @@ union MYSQL_YYSTYPE; // Forward declare for YY_DECL (Bison defines this in mysql
7192
"(" { return TOKEN_LPAREN; }
7293
")" { return TOKEN_RPAREN; }
7394
";" { return TOKEN_SEMICOLON; }
74-
"=" { return TOKEN_EQUAL; }
7595
"." { return TOKEN_DOT; }
7696
"," { return TOKEN_COMMA; }
7797

98+
"=" { return TOKEN_EQUAL; }
99+
"<" { return TOKEN_LESS; }
100+
">" { return TOKEN_GREATER; }
101+
"<=" { return TOKEN_LESS_EQUAL; }
102+
">=" { return TOKEN_GREATER_EQUAL; }
103+
"!=" { return TOKEN_NOT_EQUAL; }
104+
"<>" { return TOKEN_NOT_EQUAL; }
105+
106+
78107
[0-9]+("."[0-9]+)? { yylval_param->str_val = new std::string(yytext); return TOKEN_NUMBER_LITERAL;}
79108

80109
. {
@@ -83,7 +112,7 @@ union MYSQL_YYSTYPE; // Forward declare for YY_DECL (Bison defines this in mysql
83112
if (parser_context) {
84113
parser_context->internal_add_error(err_msg);
85114
} else {
86-
fprintf(stderr, "%s\n", err_msg);
115+
fprintf(stderr, "%s\n", err_msg); // Fallback
87116
}
88117
}
89118
}
@@ -99,27 +128,30 @@ union MYSQL_YYSTYPE; // Forward declare for YY_DECL (Bison defines this in mysql
99128
"[^'\\\\]+" { *(yylval_param->str_val) += yytext; }
100129
"\\'" { *(yylval_param->str_val) += "'"; }
101130
"\\\\" { *(yylval_param->str_val) += "\\"; }
102-
"''" { *(yylval_param->str_val) += "'"; }
131+
"''" { *(yylval_param->str_val) += "'"; } /* MySQL: '' inside string is a literal ' */
103132
"'" { *(yylval_param->str_val) += "'"; BEGIN(INITIAL); return TOKEN_STRING_LITERAL; }
104-
<<EOF>> { if(parser_context) parser_context->internal_add_error("Unterminated single-quoted string"); BEGIN(INITIAL); return YY_NULL; }
133+
<<EOF>> { if(parser_context) parser_context->internal_add_error("Unterminated single-quoted string"); BEGIN(INITIAL); return YY_NULL; /* Return 0 for EOF/error */ }
105134
}
106135

107136
<DQSTRING>{
108137
"[^\"\\\\]+" { *(yylval_param->str_val) += yytext; }
109138
"\\\"" { *(yylval_param->str_val) += "\""; }
110139
"\\\\" { *(yylval_param->str_val) += "\\"; }
111-
"\"\"" { *(yylval_param->str_val) += "\""; }
140+
"\"\"" { *(yylval_param->str_val) += "\""; } /* MySQL: "" inside string is a literal " */
112141
"\"" { *(yylval_param->str_val) += "\""; BEGIN(INITIAL); return TOKEN_STRING_LITERAL; }
113142
<<EOF>> { if(parser_context) parser_context->internal_add_error("Unterminated double-quoted string"); BEGIN(INITIAL); return YY_NULL; }
114143
}
115144

116145
<BTIDENT>{
117-
"`" { BEGIN(INITIAL); return TOKEN_IDENTIFIER; } // Returns the accumulated string
118-
"``" { *(yylval_param->str_val) += '`'; }
146+
"`" { BEGIN(INITIAL); return TOKEN_IDENTIFIER; } /* Returns the accumulated string */
147+
"``" { *(yylval_param->str_val) += '`'; } /* `` inside backticks is a literal ` */
119148
[^`\n]+ { *(yylval_param->str_val) += yytext; }
120-
\n { if(parser_context) parser_context->internal_add_error("Newline in backticked identifier"); BEGIN(INITIAL); /* No explicit return */ }
149+
\n { if(parser_context) parser_context->internal_add_error("Newline in backticked identifier"); BEGIN(INITIAL); /* No explicit return, lexer will find next token or error */ }
121150
<<EOF>> { if(parser_context) parser_context->internal_add_error("Unterminated backticked identifier"); BEGIN(INITIAL); return YY_NULL; }
122151
}
123152

124153
%%
125154

155+
// yywrap is not strictly needed due to %option noyywrap
156+
// int mysql_yywrap(yyscan_t scanner) { return 1; }
157+

0 commit comments

Comments
 (0)