Skip to content

Commit b51164e

Browse files
committed
feat: add token types and keyword lookup tables for MySQL and PostgreSQL
1 parent 5c58e9f commit b51164e

3 files changed

Lines changed: 357 additions & 0 deletions

File tree

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
#ifndef SQL_PARSER_KEYWORDS_MYSQL_H
2+
#define SQL_PARSER_KEYWORDS_MYSQL_H
3+
4+
#include "sql_parser/token.h"
5+
#include <algorithm>
6+
#include <cstring>
7+
8+
namespace sql_parser {
9+
namespace mysql_keywords {
10+
11+
struct KeywordEntry {
12+
const char* text;
13+
uint8_t len;
14+
TokenType token;
15+
};
16+
17+
inline constexpr KeywordEntry KEYWORDS[] = {
18+
{"ALL", 3, TokenType::TK_ALL},
19+
{"ALTER", 5, TokenType::TK_ALTER},
20+
{"AND", 3, TokenType::TK_AND},
21+
{"AS", 2, TokenType::TK_AS},
22+
{"ASC", 3, TokenType::TK_ASC},
23+
{"AVG", 3, TokenType::TK_AVG},
24+
{"BEGIN", 5, TokenType::TK_BEGIN},
25+
{"BETWEEN", 7, TokenType::TK_BETWEEN},
26+
{"BY", 2, TokenType::TK_BY},
27+
{"CASE", 4, TokenType::TK_CASE},
28+
{"CHARACTER", 9, TokenType::TK_CHARACTER},
29+
{"CHARSET", 7, TokenType::TK_CHARSET},
30+
{"COLLATE", 7, TokenType::TK_COLLATE},
31+
{"COMMIT", 6, TokenType::TK_COMMIT},
32+
{"COMMITTED", 9, TokenType::TK_COMMITTED},
33+
{"COUNT", 5, TokenType::TK_COUNT},
34+
{"CREATE", 6, TokenType::TK_CREATE},
35+
{"CROSS", 5, TokenType::TK_CROSS},
36+
{"DATA", 4, TokenType::TK_DATA},
37+
{"DATABASE", 8, TokenType::TK_DATABASE},
38+
{"DEALLOCATE", 10, TokenType::TK_DEALLOCATE},
39+
{"DEFAULT", 7, TokenType::TK_DEFAULT},
40+
{"DELETE", 6, TokenType::TK_DELETE},
41+
{"DESC", 4, TokenType::TK_DESC},
42+
{"DISTINCT", 8, TokenType::TK_DISTINCT},
43+
{"DROP", 4, TokenType::TK_DROP},
44+
{"DUMPFILE", 8, TokenType::TK_DUMPFILE},
45+
{"ELSE", 4, TokenType::TK_ELSE},
46+
{"END", 3, TokenType::TK_END},
47+
{"EXECUTE", 7, TokenType::TK_EXECUTE},
48+
{"EXISTS", 6, TokenType::TK_EXISTS},
49+
{"FALSE", 5, TokenType::TK_FALSE},
50+
{"FETCH", 5, TokenType::TK_FETCH},
51+
{"FOR", 3, TokenType::TK_FOR},
52+
{"FROM", 4, TokenType::TK_FROM},
53+
{"FULL", 4, TokenType::TK_FULL},
54+
{"GLOBAL", 6, TokenType::TK_GLOBAL},
55+
{"GRANT", 5, TokenType::TK_GRANT},
56+
{"GROUP", 5, TokenType::TK_GROUP},
57+
{"HAVING", 6, TokenType::TK_HAVING},
58+
{"IF", 2, TokenType::TK_IF},
59+
{"IGNORE", 6, TokenType::TK_IGNORE},
60+
{"IN", 2, TokenType::TK_IN},
61+
{"INDEX", 5, TokenType::TK_INDEX},
62+
{"INNER", 5, TokenType::TK_INNER},
63+
{"INSERT", 6, TokenType::TK_INSERT},
64+
{"INTO", 4, TokenType::TK_INTO},
65+
{"IS", 2, TokenType::TK_IS},
66+
{"ISOLATION", 9, TokenType::TK_ISOLATION},
67+
{"JOIN", 4, TokenType::TK_JOIN},
68+
{"LEFT", 4, TokenType::TK_LEFT},
69+
{"LEVEL", 5, TokenType::TK_LEVEL},
70+
{"LIKE", 4, TokenType::TK_LIKE},
71+
{"LIMIT", 5, TokenType::TK_LIMIT},
72+
{"LOAD", 4, TokenType::TK_LOAD},
73+
{"LOCAL", 5, TokenType::TK_LOCAL},
74+
{"LOCK", 4, TokenType::TK_LOCK},
75+
{"LOCKED", 6, TokenType::TK_LOCKED},
76+
{"LOW_PRIORITY", 12, TokenType::TK_LOW_PRIORITY},
77+
{"MAX", 3, TokenType::TK_MAX},
78+
{"MIN", 3, TokenType::TK_MIN},
79+
{"NAMES", 5, TokenType::TK_NAMES},
80+
{"NATURAL", 7, TokenType::TK_NATURAL},
81+
{"NOT", 3, TokenType::TK_NOT},
82+
{"NOWAIT", 6, TokenType::TK_NOWAIT},
83+
{"NULL", 4, TokenType::TK_NULL},
84+
{"OFFSET", 6, TokenType::TK_OFFSET},
85+
{"ON", 2, TokenType::TK_ON},
86+
{"ONLY", 4, TokenType::TK_ONLY},
87+
{"OR", 2, TokenType::TK_OR},
88+
{"ORDER", 5, TokenType::TK_ORDER},
89+
{"OUTER", 5, TokenType::TK_OUTER},
90+
{"OUTFILE", 7, TokenType::TK_OUTFILE},
91+
{"PERSIST", 7, TokenType::TK_PERSIST},
92+
{"PREPARE", 7, TokenType::TK_PREPARE},
93+
{"QUICK", 5, TokenType::TK_QUICK},
94+
{"READ", 4, TokenType::TK_READ},
95+
{"REPEATABLE", 10, TokenType::TK_REPEATABLE},
96+
{"REPLACE", 7, TokenType::TK_REPLACE},
97+
{"RESET", 5, TokenType::TK_RESET},
98+
{"REVOKE", 6, TokenType::TK_REVOKE},
99+
{"RIGHT", 5, TokenType::TK_RIGHT},
100+
{"ROLLBACK", 8, TokenType::TK_ROLLBACK},
101+
{"SAVEPOINT", 9, TokenType::TK_SAVEPOINT},
102+
{"SCHEMA", 6, TokenType::TK_SCHEMA},
103+
{"SELECT", 6, TokenType::TK_SELECT},
104+
{"SERIALIZABLE", 12, TokenType::TK_SERIALIZABLE},
105+
{"SESSION", 7, TokenType::TK_SESSION},
106+
{"SET", 3, TokenType::TK_SET},
107+
{"SHARE", 5, TokenType::TK_SHARE},
108+
{"SHOW", 4, TokenType::TK_SHOW},
109+
{"SKIP", 4, TokenType::TK_SKIP},
110+
{"SQL_CALC_FOUND_ROWS", 19, TokenType::TK_SQL_CALC_FOUND_ROWS},
111+
{"START", 5, TokenType::TK_START},
112+
{"SUM", 3, TokenType::TK_SUM},
113+
{"TABLE", 5, TokenType::TK_TABLE},
114+
{"THEN", 4, TokenType::TK_THEN},
115+
{"TO", 2, TokenType::TK_TO},
116+
{"TRANSACTION", 11, TokenType::TK_TRANSACTION},
117+
{"TRUE", 4, TokenType::TK_TRUE},
118+
{"TRUNCATE", 8, TokenType::TK_TRUNCATE},
119+
{"UNCOMMITTED", 11, TokenType::TK_UNCOMMITTED},
120+
{"UNLOCK", 6, TokenType::TK_UNLOCK},
121+
{"UPDATE", 6, TokenType::TK_UPDATE},
122+
{"USE", 3, TokenType::TK_USE},
123+
{"USING", 5, TokenType::TK_USING},
124+
{"VALUES", 6, TokenType::TK_VALUES},
125+
{"VIEW", 4, TokenType::TK_VIEW},
126+
{"WHEN", 4, TokenType::TK_WHEN},
127+
{"WHERE", 5, TokenType::TK_WHERE},
128+
{"WRITE", 5, TokenType::TK_WRITE},
129+
};
130+
131+
inline constexpr size_t KEYWORD_COUNT = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]);
132+
133+
inline TokenType lookup(const char* text, uint32_t len) {
134+
size_t lo = 0, hi = KEYWORD_COUNT;
135+
while (lo < hi) {
136+
size_t mid = lo + (hi - lo) / 2;
137+
int cmp = sql_parser::ci_cmp(text, len, KEYWORDS[mid].text, KEYWORDS[mid].len);
138+
if (cmp == 0) return KEYWORDS[mid].token;
139+
if (cmp < 0) hi = mid;
140+
else lo = mid + 1;
141+
}
142+
return TokenType::TK_IDENTIFIER;
143+
}
144+
145+
} // namespace mysql_keywords
146+
} // namespace sql_parser
147+
148+
#endif // SQL_PARSER_KEYWORDS_MYSQL_H
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#ifndef SQL_PARSER_KEYWORDS_PGSQL_H
2+
#define SQL_PARSER_KEYWORDS_PGSQL_H
3+
4+
#include "sql_parser/token.h"
5+
6+
namespace sql_parser {
7+
namespace pgsql_keywords {
8+
9+
struct KeywordEntry {
10+
const char* text;
11+
uint8_t len;
12+
TokenType token;
13+
};
14+
15+
inline constexpr KeywordEntry KEYWORDS[] = {
16+
{"ALL", 3, TokenType::TK_ALL},
17+
{"ALTER", 5, TokenType::TK_ALTER},
18+
{"AND", 3, TokenType::TK_AND},
19+
{"AS", 2, TokenType::TK_AS},
20+
{"ASC", 3, TokenType::TK_ASC},
21+
{"AVG", 3, TokenType::TK_AVG},
22+
{"BEGIN", 5, TokenType::TK_BEGIN},
23+
{"BETWEEN", 7, TokenType::TK_BETWEEN},
24+
{"BY", 2, TokenType::TK_BY},
25+
{"CASE", 4, TokenType::TK_CASE},
26+
{"CHARACTER", 9, TokenType::TK_CHARACTER},
27+
{"COLLATE", 7, TokenType::TK_COLLATE},
28+
{"COMMIT", 6, TokenType::TK_COMMIT},
29+
{"COMMITTED", 9, TokenType::TK_COMMITTED},
30+
{"COUNT", 5, TokenType::TK_COUNT},
31+
{"CREATE", 6, TokenType::TK_CREATE},
32+
{"CROSS", 5, TokenType::TK_CROSS},
33+
{"DATA", 4, TokenType::TK_DATA},
34+
{"DATABASE", 8, TokenType::TK_DATABASE},
35+
{"DEALLOCATE", 10, TokenType::TK_DEALLOCATE},
36+
{"DEFAULT", 7, TokenType::TK_DEFAULT},
37+
{"DELETE", 6, TokenType::TK_DELETE},
38+
{"DESC", 4, TokenType::TK_DESC},
39+
{"DISTINCT", 8, TokenType::TK_DISTINCT},
40+
{"DROP", 4, TokenType::TK_DROP},
41+
{"ELSE", 4, TokenType::TK_ELSE},
42+
{"END", 3, TokenType::TK_END},
43+
{"EXECUTE", 7, TokenType::TK_EXECUTE},
44+
{"EXISTS", 6, TokenType::TK_EXISTS},
45+
{"FALSE", 5, TokenType::TK_FALSE},
46+
{"FETCH", 5, TokenType::TK_FETCH},
47+
{"FOR", 3, TokenType::TK_FOR},
48+
{"FROM", 4, TokenType::TK_FROM},
49+
{"FULL", 4, TokenType::TK_FULL},
50+
{"GRANT", 5, TokenType::TK_GRANT},
51+
{"GROUP", 5, TokenType::TK_GROUP},
52+
{"HAVING", 6, TokenType::TK_HAVING},
53+
{"IF", 2, TokenType::TK_IF},
54+
{"IN", 2, TokenType::TK_IN},
55+
{"INDEX", 5, TokenType::TK_INDEX},
56+
{"INNER", 5, TokenType::TK_INNER},
57+
{"INSERT", 6, TokenType::TK_INSERT},
58+
{"INTO", 4, TokenType::TK_INTO},
59+
{"IS", 2, TokenType::TK_IS},
60+
{"ISOLATION", 9, TokenType::TK_ISOLATION},
61+
{"JOIN", 4, TokenType::TK_JOIN},
62+
{"LEFT", 4, TokenType::TK_LEFT},
63+
{"LEVEL", 5, TokenType::TK_LEVEL},
64+
{"LIKE", 4, TokenType::TK_LIKE},
65+
{"LIMIT", 5, TokenType::TK_LIMIT},
66+
{"LOAD", 4, TokenType::TK_LOAD},
67+
{"LOCAL", 5, TokenType::TK_LOCAL},
68+
{"LOCK", 4, TokenType::TK_LOCK},
69+
{"MAX", 3, TokenType::TK_MAX},
70+
{"MIN", 3, TokenType::TK_MIN},
71+
{"NAMES", 5, TokenType::TK_NAMES},
72+
{"NATURAL", 7, TokenType::TK_NATURAL},
73+
{"NOT", 3, TokenType::TK_NOT},
74+
{"NULL", 4, TokenType::TK_NULL},
75+
{"OFFSET", 6, TokenType::TK_OFFSET},
76+
{"ON", 2, TokenType::TK_ON},
77+
{"ONLY", 4, TokenType::TK_ONLY},
78+
{"OR", 2, TokenType::TK_OR},
79+
{"ORDER", 5, TokenType::TK_ORDER},
80+
{"OUTER", 5, TokenType::TK_OUTER},
81+
{"PREPARE", 7, TokenType::TK_PREPARE},
82+
{"READ", 4, TokenType::TK_READ},
83+
{"REPEATABLE", 10, TokenType::TK_REPEATABLE},
84+
{"RESET", 5, TokenType::TK_RESET},
85+
{"REVOKE", 6, TokenType::TK_REVOKE},
86+
{"RIGHT", 5, TokenType::TK_RIGHT},
87+
{"ROLLBACK", 8, TokenType::TK_ROLLBACK},
88+
{"SAVEPOINT", 9, TokenType::TK_SAVEPOINT},
89+
{"SCHEMA", 6, TokenType::TK_SCHEMA},
90+
{"SELECT", 6, TokenType::TK_SELECT},
91+
{"SERIALIZABLE", 12, TokenType::TK_SERIALIZABLE},
92+
{"SESSION", 7, TokenType::TK_SESSION},
93+
{"SET", 3, TokenType::TK_SET},
94+
{"SHARE", 5, TokenType::TK_SHARE},
95+
{"SHOW", 4, TokenType::TK_SHOW},
96+
{"START", 5, TokenType::TK_START},
97+
{"SUM", 3, TokenType::TK_SUM},
98+
{"TABLE", 5, TokenType::TK_TABLE},
99+
{"THEN", 4, TokenType::TK_THEN},
100+
{"TO", 2, TokenType::TK_TO},
101+
{"TRANSACTION", 11, TokenType::TK_TRANSACTION},
102+
{"TRUE", 4, TokenType::TK_TRUE},
103+
{"TRUNCATE", 8, TokenType::TK_TRUNCATE},
104+
{"UNCOMMITTED", 11, TokenType::TK_UNCOMMITTED},
105+
{"UNLOCK", 6, TokenType::TK_UNLOCK},
106+
{"UPDATE", 6, TokenType::TK_UPDATE},
107+
{"USE", 3, TokenType::TK_USE},
108+
{"USING", 5, TokenType::TK_USING},
109+
{"VALUES", 6, TokenType::TK_VALUES},
110+
{"VIEW", 4, TokenType::TK_VIEW},
111+
{"WHEN", 4, TokenType::TK_WHEN},
112+
{"WHERE", 5, TokenType::TK_WHERE},
113+
{"WRITE", 5, TokenType::TK_WRITE},
114+
};
115+
116+
inline constexpr size_t KEYWORD_COUNT = sizeof(KEYWORDS) / sizeof(KEYWORDS[0]);
117+
118+
inline TokenType lookup(const char* text, uint32_t len) {
119+
size_t lo = 0, hi = KEYWORD_COUNT;
120+
while (lo < hi) {
121+
size_t mid = lo + (hi - lo) / 2;
122+
int cmp = sql_parser::ci_cmp(text, len, KEYWORDS[mid].text, KEYWORDS[mid].len);
123+
if (cmp == 0) return KEYWORDS[mid].token;
124+
if (cmp < 0) hi = mid;
125+
else lo = mid + 1;
126+
}
127+
return TokenType::TK_IDENTIFIER;
128+
}
129+
130+
} // namespace pgsql_keywords
131+
} // namespace sql_parser
132+
133+
#endif // SQL_PARSER_KEYWORDS_PGSQL_H

include/sql_parser/token.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#ifndef SQL_PARSER_TOKEN_H
2+
#define SQL_PARSER_TOKEN_H
3+
4+
#include "sql_parser/common.h"
5+
#include <cstdint>
6+
7+
namespace sql_parser {
8+
9+
enum class TokenType : uint16_t {
10+
TK_EOF = 0,
11+
TK_ERROR,
12+
TK_IDENTIFIER,
13+
TK_INTEGER,
14+
TK_FLOAT,
15+
TK_STRING,
16+
TK_LPAREN,
17+
TK_RPAREN,
18+
TK_COMMA,
19+
TK_SEMICOLON,
20+
TK_DOT,
21+
TK_ASTERISK,
22+
TK_PLUS,
23+
TK_MINUS,
24+
TK_SLASH,
25+
TK_PERCENT,
26+
TK_EQUAL,
27+
TK_NOT_EQUAL,
28+
TK_LESS,
29+
TK_GREATER,
30+
TK_LESS_EQUAL,
31+
TK_GREATER_EQUAL,
32+
TK_AMPERSAND,
33+
TK_PIPE,
34+
TK_CARET,
35+
TK_TILDE,
36+
TK_EXCLAIM,
37+
TK_COLON,
38+
TK_QUESTION,
39+
TK_AT,
40+
TK_DOUBLE_AT,
41+
TK_HASH,
42+
TK_COLON_EQUAL,
43+
TK_DOUBLE_PIPE,
44+
TK_DOUBLE_COLON,
45+
TK_DOLLAR_NUM,
46+
TK_SELECT, TK_INSERT, TK_UPDATE, TK_DELETE, TK_REPLACE,
47+
TK_FROM, TK_WHERE, TK_SET, TK_INTO, TK_VALUES, TK_AS, TK_ON, TK_USING,
48+
TK_JOIN, TK_INNER, TK_LEFT, TK_RIGHT, TK_FULL, TK_OUTER, TK_CROSS, TK_NATURAL,
49+
TK_ORDER, TK_BY, TK_GROUP, TK_HAVING, TK_LIMIT, TK_OFFSET, TK_FETCH,
50+
TK_ASC, TK_DESC, TK_DISTINCT, TK_ALL,
51+
TK_AND, TK_OR, TK_NOT, TK_IS, TK_NULL, TK_IN, TK_BETWEEN, TK_LIKE, TK_EXISTS,
52+
TK_CASE, TK_WHEN, TK_THEN, TK_ELSE, TK_END, TK_TRUE, TK_FALSE,
53+
TK_NAMES, TK_CHARACTER, TK_CHARSET, TK_COLLATE, TK_GLOBAL, TK_SESSION, TK_LOCAL,
54+
TK_PERSIST, TK_DEFAULT, TK_TRANSACTION, TK_ISOLATION, TK_LEVEL,
55+
TK_READ, TK_WRITE, TK_ONLY, TK_COMMITTED, TK_UNCOMMITTED, TK_REPEATABLE,
56+
TK_SERIALIZABLE, TK_TO,
57+
TK_CREATE, TK_ALTER, TK_DROP, TK_TRUNCATE, TK_TABLE, TK_INDEX, TK_VIEW,
58+
TK_DATABASE, TK_SCHEMA, TK_IF,
59+
TK_BEGIN, TK_START, TK_COMMIT, TK_ROLLBACK, TK_SAVEPOINT,
60+
TK_USE, TK_SHOW, TK_PREPARE, TK_EXECUTE, TK_DEALLOCATE,
61+
TK_GRANT, TK_REVOKE, TK_LOCK, TK_UNLOCK, TK_LOAD, TK_DATA,
62+
TK_FOR, TK_SHARE, TK_NOWAIT, TK_SKIP, TK_LOCKED,
63+
TK_OUTFILE, TK_DUMPFILE, TK_IGNORE, TK_LOW_PRIORITY, TK_QUICK, TK_RESET,
64+
TK_SQL_CALC_FOUND_ROWS,
65+
TK_COUNT, TK_SUM, TK_AVG, TK_MIN, TK_MAX,
66+
};
67+
68+
struct Token {
69+
TokenType type = TokenType::TK_EOF;
70+
StringRef text;
71+
uint32_t offset = 0;
72+
};
73+
74+
} // namespace sql_parser
75+
76+
#endif // SQL_PARSER_TOKEN_H

0 commit comments

Comments
 (0)