Skip to content

Commit 1108cf9

Browse files
committed
feat: add LIKE pattern matcher, Tag<->Kind mapping, fix CASE/WHEN flags
1 parent 377dc89 commit 1108cf9

5 files changed

Lines changed: 317 additions & 1 deletion

File tree

Makefile.new

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ TEST_SRCS = $(TEST_DIR)/test_main.cpp \
4646
$(TEST_DIR)/test_comparison.cpp \
4747
$(TEST_DIR)/test_string_funcs.cpp \
4848
$(TEST_DIR)/test_cast.cpp \
49-
$(TEST_DIR)/test_registry.cpp
49+
$(TEST_DIR)/test_registry.cpp \
50+
$(TEST_DIR)/test_like.cpp
5051
TEST_OBJS = $(TEST_SRCS:.cpp=.o)
5152
TEST_TARGET = $(PROJECT_ROOT)/run_tests
5253

include/sql_engine/like.h

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#ifndef SQL_ENGINE_LIKE_H
2+
#define SQL_ENGINE_LIKE_H
3+
4+
#include "sql_parser/common.h"
5+
#include <cctype>
6+
7+
namespace sql_engine {
8+
9+
using sql_parser::Dialect;
10+
using sql_parser::StringRef;
11+
12+
namespace detail {
13+
14+
inline char to_lower(char c) {
15+
return (c >= 'A' && c <= 'Z') ? static_cast<char>(c + 32) : c;
16+
}
17+
18+
} // namespace detail
19+
20+
// Match a string against a SQL LIKE pattern.
21+
//
22+
// Template parameter D controls case sensitivity:
23+
// MySQL: case-insensitive by default
24+
// PostgreSQL: case-sensitive (use ILIKE for insensitive, not handled here)
25+
//
26+
// Pattern characters:
27+
// % -- matches zero or more characters
28+
// _ -- matches exactly one character
29+
// escape_char -- next character is literal (default '\')
30+
//
31+
// Algorithm: iterative with backtracking via saved positions for '%'.
32+
// O(n*m) worst case, O(n+m) typical.
33+
template <Dialect D>
34+
bool match_like(StringRef text, StringRef pattern, char escape_char = '\\') {
35+
constexpr bool case_insensitive = (D == Dialect::MySQL);
36+
37+
uint32_t ti = 0; // text index
38+
uint32_t pi = 0; // pattern index
39+
40+
// Saved positions for '%' backtracking
41+
uint32_t star_pi = UINT32_MAX; // pattern position after last '%'
42+
uint32_t star_ti = UINT32_MAX; // text position when last '%' was hit
43+
44+
while (ti < text.len) {
45+
if (pi < pattern.len) {
46+
char pc = pattern.ptr[pi];
47+
48+
// Check escape character
49+
if (pc == escape_char && pi + 1 < pattern.len) {
50+
// Next character is literal
51+
pi++;
52+
pc = pattern.ptr[pi];
53+
char tc = text.ptr[ti];
54+
if (case_insensitive) {
55+
tc = detail::to_lower(tc);
56+
pc = detail::to_lower(pc);
57+
}
58+
if (tc == pc) {
59+
ti++;
60+
pi++;
61+
continue;
62+
}
63+
// Fall through to backtrack
64+
} else if (pc == '%') {
65+
// Save backtrack position
66+
star_pi = pi + 1;
67+
star_ti = ti;
68+
pi++;
69+
continue;
70+
} else if (pc == '_') {
71+
// Match exactly one character
72+
ti++;
73+
pi++;
74+
continue;
75+
} else {
76+
// Literal character match
77+
char tc = text.ptr[ti];
78+
if (case_insensitive) {
79+
tc = detail::to_lower(tc);
80+
pc = detail::to_lower(pc);
81+
}
82+
if (tc == pc) {
83+
ti++;
84+
pi++;
85+
continue;
86+
}
87+
// Fall through to backtrack
88+
}
89+
}
90+
91+
// Mismatch or pattern exhausted: try backtracking to last '%'
92+
if (star_pi != UINT32_MAX) {
93+
pi = star_pi;
94+
star_ti++;
95+
ti = star_ti;
96+
continue;
97+
}
98+
99+
// No '%' to backtrack to: match fails
100+
return false;
101+
}
102+
103+
// Text consumed: skip any remaining '%' in pattern
104+
while (pi < pattern.len && pattern.ptr[pi] == '%') {
105+
pi++;
106+
}
107+
108+
return pi == pattern.len;
109+
}
110+
111+
} // namespace sql_engine
112+
113+
#endif // SQL_ENGINE_LIKE_H

include/sql_engine/tag_kind_map.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#ifndef SQL_ENGINE_TAG_KIND_MAP_H
2+
#define SQL_ENGINE_TAG_KIND_MAP_H
3+
4+
#include "sql_engine/types.h"
5+
#include "sql_engine/value.h"
6+
7+
namespace sql_engine {
8+
9+
// Convert a runtime Value::Tag to the corresponding SqlType::Kind.
10+
// Used before calling CoercionRules<D>::common_type().
11+
inline SqlType::Kind tag_to_kind(Value::Tag tag) {
12+
switch (tag) {
13+
case Value::TAG_NULL: return SqlType::NULL_TYPE;
14+
case Value::TAG_BOOL: return SqlType::BOOLEAN;
15+
case Value::TAG_INT64: return SqlType::BIGINT;
16+
case Value::TAG_UINT64: return SqlType::BIGINT;
17+
case Value::TAG_DOUBLE: return SqlType::DOUBLE;
18+
case Value::TAG_DECIMAL: return SqlType::DECIMAL;
19+
case Value::TAG_STRING: return SqlType::VARCHAR;
20+
case Value::TAG_BYTES: return SqlType::VARBINARY;
21+
case Value::TAG_DATE: return SqlType::DATE;
22+
case Value::TAG_TIME: return SqlType::TIME;
23+
case Value::TAG_DATETIME: return SqlType::DATETIME;
24+
case Value::TAG_TIMESTAMP: return SqlType::TIMESTAMP;
25+
case Value::TAG_INTERVAL: return SqlType::INTERVAL;
26+
case Value::TAG_JSON: return SqlType::JSON;
27+
default: return SqlType::UNKNOWN;
28+
}
29+
}
30+
31+
// Convert a SqlType::Kind back to a Value::Tag for coercion targets.
32+
// Used after common_type() returns the promotion target.
33+
inline Value::Tag kind_to_tag(SqlType::Kind kind) {
34+
switch (kind) {
35+
case SqlType::BOOLEAN: return Value::TAG_BOOL;
36+
case SqlType::TINYINT:
37+
case SqlType::SMALLINT:
38+
case SqlType::MEDIUMINT:
39+
case SqlType::INT:
40+
case SqlType::BIGINT: return Value::TAG_INT64;
41+
case SqlType::FLOAT:
42+
case SqlType::DOUBLE: return Value::TAG_DOUBLE;
43+
case SqlType::DECIMAL: return Value::TAG_DECIMAL;
44+
case SqlType::CHAR:
45+
case SqlType::VARCHAR:
46+
case SqlType::TEXT:
47+
case SqlType::MEDIUMTEXT:
48+
case SqlType::LONGTEXT: return Value::TAG_STRING;
49+
case SqlType::BINARY:
50+
case SqlType::VARBINARY:
51+
case SqlType::BLOB: return Value::TAG_BYTES;
52+
case SqlType::DATE: return Value::TAG_DATE;
53+
case SqlType::TIME: return Value::TAG_TIME;
54+
case SqlType::DATETIME: return Value::TAG_DATETIME;
55+
case SqlType::TIMESTAMP: return Value::TAG_TIMESTAMP;
56+
case SqlType::INTERVAL: return Value::TAG_INTERVAL;
57+
case SqlType::JSON:
58+
case SqlType::JSONB: return Value::TAG_JSON;
59+
default: return Value::TAG_NULL;
60+
}
61+
}
62+
63+
} // namespace sql_engine
64+
65+
#endif // SQL_ENGINE_TAG_KIND_MAP_H

include/sql_parser/expression_parser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,7 @@ class ExpressionParser {
412412
AstNode* node = make_node(arena_, NodeType::NODE_CASE_WHEN);
413413
// Optional simple CASE expression: CASE expr WHEN ...
414414
if (tok_.peek().type != TokenType::TK_WHEN) {
415+
node->flags = 1; // simple CASE (has case_expr)
415416
AstNode* case_expr = parse();
416417
if (case_expr) node->add_child(case_expr);
417418
}

tests/test_like.cpp

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#include <gtest/gtest.h>
2+
#include "sql_engine/like.h"
3+
4+
using namespace sql_engine;
5+
using sql_parser::Dialect;
6+
using sql_parser::StringRef;
7+
8+
// Helper to make StringRef from string literal
9+
static StringRef S(const char* s) {
10+
return StringRef{s, static_cast<uint32_t>(std::strlen(s))};
11+
}
12+
13+
// ===== MySQL (case-insensitive) =====
14+
15+
class LikeMySQLTest : public ::testing::Test {};
16+
17+
TEST_F(LikeMySQLTest, ExactMatch) {
18+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("hello")));
19+
}
20+
21+
TEST_F(LikeMySQLTest, ExactMatchCaseInsensitive) {
22+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("Hello"), S("hello")));
23+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("HELLO"), S("hello")));
24+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("HELLO")));
25+
}
26+
27+
TEST_F(LikeMySQLTest, NoMatch) {
28+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("hello"), S("world")));
29+
}
30+
31+
TEST_F(LikeMySQLTest, PercentPrefix) {
32+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%llo")));
33+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%hello")));
34+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%o")));
35+
}
36+
37+
TEST_F(LikeMySQLTest, PercentSuffix) {
38+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("hel%")));
39+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("hello%")));
40+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("h%")));
41+
}
42+
43+
TEST_F(LikeMySQLTest, PercentBoth) {
44+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%ell%")));
45+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%hello%")));
46+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%%")));
47+
}
48+
49+
TEST_F(LikeMySQLTest, PercentOnly) {
50+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("anything"), S("%")));
51+
EXPECT_TRUE(match_like<Dialect::MySQL>(S(""), S("%")));
52+
}
53+
54+
TEST_F(LikeMySQLTest, Underscore) {
55+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("hell_")));
56+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("_ello")));
57+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("_____")));
58+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("hello"), S("____")));
59+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("hello"), S("______")));
60+
}
61+
62+
TEST_F(LikeMySQLTest, UnderscoreAndPercent) {
63+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("_ell%")));
64+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello"), S("%ll_")));
65+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("hello world"), S("hello_world")));
66+
}
67+
68+
TEST_F(LikeMySQLTest, EscapeCharacter) {
69+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("100%"), S("100\\%")));
70+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("100x"), S("100\\%")));
71+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("a_b"), S("a\\_b")));
72+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("axb"), S("a\\_b")));
73+
}
74+
75+
TEST_F(LikeMySQLTest, CustomEscapeCharacter) {
76+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("100%"), S("100#%"), '#'));
77+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("100x"), S("100#%"), '#'));
78+
}
79+
80+
TEST_F(LikeMySQLTest, EmptyString) {
81+
EXPECT_TRUE(match_like<Dialect::MySQL>(S(""), S("")));
82+
EXPECT_TRUE(match_like<Dialect::MySQL>(S(""), S("%")));
83+
EXPECT_FALSE(match_like<Dialect::MySQL>(S(""), S("_")));
84+
EXPECT_FALSE(match_like<Dialect::MySQL>(S(""), S("a")));
85+
}
86+
87+
TEST_F(LikeMySQLTest, EmptyPattern) {
88+
EXPECT_TRUE(match_like<Dialect::MySQL>(S(""), S("")));
89+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("hello"), S("")));
90+
}
91+
92+
TEST_F(LikeMySQLTest, MultiplePercents) {
93+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("abcdef"), S("%b%d%f")));
94+
EXPECT_TRUE(match_like<Dialect::MySQL>(S("abcdef"), S("%b%e%")));
95+
EXPECT_FALSE(match_like<Dialect::MySQL>(S("abcdef"), S("%z%")));
96+
}
97+
98+
// ===== PostgreSQL (case-sensitive) =====
99+
100+
class LikePgSQLTest : public ::testing::Test {};
101+
102+
TEST_F(LikePgSQLTest, ExactMatch) {
103+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("hello"), S("hello")));
104+
}
105+
106+
TEST_F(LikePgSQLTest, CaseSensitive) {
107+
EXPECT_FALSE(match_like<Dialect::PostgreSQL>(S("Hello"), S("hello")));
108+
EXPECT_FALSE(match_like<Dialect::PostgreSQL>(S("HELLO"), S("hello")));
109+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("hello"), S("hello")));
110+
}
111+
112+
TEST_F(LikePgSQLTest, PercentPrefix) {
113+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("hello"), S("%llo")));
114+
EXPECT_FALSE(match_like<Dialect::PostgreSQL>(S("hello"), S("%LLO")));
115+
}
116+
117+
TEST_F(LikePgSQLTest, PercentSuffix) {
118+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("hello"), S("hel%")));
119+
EXPECT_FALSE(match_like<Dialect::PostgreSQL>(S("hello"), S("HEL%")));
120+
}
121+
122+
TEST_F(LikePgSQLTest, Underscore) {
123+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("hello"), S("hell_")));
124+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("hello"), S("_ello")));
125+
}
126+
127+
TEST_F(LikePgSQLTest, EscapeCharacter) {
128+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S("100%"), S("100\\%")));
129+
EXPECT_FALSE(match_like<Dialect::PostgreSQL>(S("100x"), S("100\\%")));
130+
}
131+
132+
TEST_F(LikePgSQLTest, EmptyStringEdgeCases) {
133+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S(""), S("")));
134+
EXPECT_TRUE(match_like<Dialect::PostgreSQL>(S(""), S("%")));
135+
EXPECT_FALSE(match_like<Dialect::PostgreSQL>(S(""), S("_")));
136+
}

0 commit comments

Comments
 (0)