Skip to content

Commit 4e9a633

Browse files
committed
feat: add sqlengine CLI tool for interactive SQL execution
Adds tools/sqlengine.cpp — a standalone CLI that reads SQL from stdin and executes through the full pipeline (parse -> plan -> optimize -> execute). Supports in-memory mode for expression evaluation and backend mode for distributed queries. MySQL-style table output with timing. Also adds engine benchmarks (bench_engine.cpp) covering expression evaluation, plan building, full pipeline, filter, join, sort, and aggregate operators.
1 parent 6be0b04 commit 4e9a633

3 files changed

Lines changed: 868 additions & 5 deletions

File tree

Makefile.new

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,18 +88,24 @@ GBENCH_OBJS = $(GBENCH_SRCS:.cc=.o)
8888
GBENCH_CPPFLAGS = -I$(GBENCH_DIR)/include -I$(GBENCH_DIR)/src -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK
8989

9090
BENCH_DIR = $(PROJECT_ROOT)/bench
91-
BENCH_SRCS = $(BENCH_DIR)/bench_main.cpp $(BENCH_DIR)/bench_parser.cpp
91+
BENCH_SRCS = $(BENCH_DIR)/bench_main.cpp $(BENCH_DIR)/bench_parser.cpp $(BENCH_DIR)/bench_engine.cpp
9292
BENCH_OBJS = $(BENCH_SRCS:.cpp=.o)
9393
BENCH_TARGET = $(PROJECT_ROOT)/run_bench
9494

9595
# Corpus test
9696
CORPUS_TEST_SRC = $(TEST_DIR)/corpus_test.cpp
9797
CORPUS_TEST_TARGET = $(PROJECT_ROOT)/corpus_test
9898

99-
.PHONY: all lib test bench bench-compare build-corpus-test clean
99+
# SQL Engine CLI tool
100+
SQLENGINE_SRC = $(PROJECT_ROOT)/tools/sqlengine.cpp
101+
SQLENGINE_TARGET = sqlengine
102+
103+
.PHONY: all lib test bench bench-compare build-corpus-test build-sqlengine clean
100104

101105
build-corpus-test: $(CORPUS_TEST_TARGET)
102106

107+
build-sqlengine: $(SQLENGINE_TARGET)
108+
103109
all: lib test
104110

105111
lib: $(LIB_TARGET)
@@ -134,13 +140,17 @@ $(GBENCH_DIR)/src/%.o: $(GBENCH_DIR)/src/%.cc
134140
$(CXX) $(CXXFLAGS) $(GBENCH_CPPFLAGS) -c $< -o $@
135141

136142
$(BENCH_DIR)/%.o: $(BENCH_DIR)/%.cpp
137-
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(GBENCH_CPPFLAGS) -c $< -o $@
143+
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(GBENCH_CPPFLAGS) $(MYSQL_CFLAGS) $(PG_CFLAGS) -c $< -o $@
138144

139145
bench: $(BENCH_TARGET)
140146
./$(BENCH_TARGET) --benchmark_format=console
141147

142-
$(BENCH_TARGET): $(BENCH_OBJS) $(GBENCH_OBJS) $(LIB_TARGET)
143-
$(CXX) $(CXXFLAGS) -o $@ $(BENCH_OBJS) $(GBENCH_OBJS) -L$(PROJECT_ROOT) -lsqlparser -lpthread
148+
$(BENCH_TARGET): $(BENCH_OBJS) $(GBENCH_OBJS) $(LIB_TARGET) $(ENGINE_OBJS)
149+
$(CXX) $(CXXFLAGS) -o $@ $(BENCH_OBJS) $(GBENCH_OBJS) $(ENGINE_OBJS) -L$(PROJECT_ROOT) -lsqlparser -lpthread $(MYSQL_LIBS) $(PG_LIBS)
150+
151+
# SQL Engine CLI tool
152+
$(SQLENGINE_TARGET): $(SQLENGINE_SRC) $(LIB_TARGET) $(ENGINE_OBJS)
153+
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(MYSQL_CFLAGS) $(PG_CFLAGS) -o $@ $< $(ENGINE_OBJS) -L$(PROJECT_ROOT) -lsqlparser -lpthread $(MYSQL_LIBS) $(PG_LIBS)
144154

145155
$(CORPUS_TEST_TARGET): $(CORPUS_TEST_SRC) $(LIB_TARGET)
146156
$(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $@ $< -L$(PROJECT_ROOT) -lsqlparser
@@ -168,4 +178,5 @@ clean:
168178
rm -f $(ENGINE_OBJS)
169179
rm -f $(BENCH_OBJS) $(GBENCH_OBJS) $(BENCH_TARGET) $(CORPUS_TEST_TARGET)
170180
rm -f $(BENCH_COMPARE_OBJ) $(BENCH_COMPARE_TARGET)
181+
rm -f $(SQLENGINE_TARGET)
171182
@echo "Cleaned."

bench/bench_engine.cpp

Lines changed: 342 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
// bench_engine.cpp — Engine execution benchmarks
2+
//
3+
// Benchmarks the query engine pipeline: expression evaluation, plan building,
4+
// full pipeline execution, filter/join/sort/aggregate operators.
5+
6+
#include <benchmark/benchmark.h>
7+
8+
#include "sql_parser/parser.h"
9+
#include "sql_parser/common.h"
10+
#include "sql_engine/session.h"
11+
#include "sql_engine/in_memory_catalog.h"
12+
#include "sql_engine/data_source.h"
13+
#include "sql_engine/local_txn.h"
14+
#include "sql_engine/expression_eval.h"
15+
#include "sql_engine/function_registry.h"
16+
#include "sql_engine/plan_builder.h"
17+
#include "sql_engine/plan_executor.h"
18+
#include "sql_engine/optimizer.h"
19+
#include "sql_engine/catalog_resolver.h"
20+
#include "sql_engine/value.h"
21+
#include "sql_engine/row.h"
22+
23+
#include <vector>
24+
#include <string>
25+
#include <cstring>
26+
27+
using namespace sql_parser;
28+
using namespace sql_engine;
29+
30+
// ========== Helpers ==========
31+
32+
// Build a catalog with a "users" table: id INT, name VARCHAR, age INT, score DOUBLE
33+
static InMemoryCatalog make_users_catalog() {
34+
InMemoryCatalog catalog;
35+
catalog.add_table("", "users", {
36+
{"id", SqlType::make_int(), false},
37+
{"name", SqlType::make_varchar(255), true},
38+
{"age", SqlType::make_int(), true},
39+
{"score", SqlType{SqlType::DOUBLE, 0, 0, false, false}, true},
40+
});
41+
return catalog;
42+
}
43+
44+
// Build a catalog with "orders" table: id INT, user_id INT, total DOUBLE
45+
static void add_orders_table(InMemoryCatalog& catalog) {
46+
catalog.add_table("", "orders", {
47+
{"id", SqlType::make_int(), false},
48+
{"user_id", SqlType::make_int(), true},
49+
{"total", SqlType{SqlType::DOUBLE, 0, 0, false, false}, true},
50+
});
51+
}
52+
53+
// Build N rows for users table
54+
static std::vector<Row> make_user_rows(Arena& arena, int n) {
55+
std::vector<Row> rows;
56+
rows.reserve(n);
57+
for (int i = 0; i < n; ++i) {
58+
Row r = make_row(arena, 4);
59+
r.set(0, value_int(static_cast<int64_t>(i + 1)));
60+
61+
char buf[32];
62+
int len = snprintf(buf, sizeof(buf), "user_%d", i);
63+
char* name = static_cast<char*>(arena.allocate(static_cast<uint32_t>(len)));
64+
std::memcpy(name, buf, static_cast<size_t>(len));
65+
r.set(1, value_string(StringRef{name, static_cast<uint32_t>(len)}));
66+
67+
r.set(2, value_int(static_cast<int64_t>(18 + (i % 50))));
68+
r.set(3, value_double(static_cast<double>(50 + (i % 100))));
69+
rows.push_back(r);
70+
}
71+
return rows;
72+
}
73+
74+
// Build N rows for orders table
75+
static std::vector<Row> make_order_rows(Arena& arena, int n, int max_user_id) {
76+
std::vector<Row> rows;
77+
rows.reserve(n);
78+
for (int i = 0; i < n; ++i) {
79+
Row r = make_row(arena, 3);
80+
r.set(0, value_int(static_cast<int64_t>(i + 1)));
81+
r.set(1, value_int(static_cast<int64_t>((i % max_user_id) + 1)));
82+
r.set(2, value_double(static_cast<double>(10 + (i % 200))));
83+
rows.push_back(r);
84+
}
85+
return rows;
86+
}
87+
88+
// ========== Expression Evaluation Benchmarks ==========
89+
90+
// Simple: 1 + 2
91+
static void BM_Expr_SimpleArith(benchmark::State& state) {
92+
Parser<Dialect::MySQL> parser;
93+
FunctionRegistry<Dialect::MySQL> funcs;
94+
funcs.register_builtins();
95+
Arena arena{65536, 1048576};
96+
97+
const char* sql = "SELECT 1 + 2";
98+
size_t len = std::strlen(sql);
99+
100+
auto pr = parser.parse(sql, len);
101+
// The expression is in the SELECT list: pr.ast->first_child
102+
AstNode* expr = pr.ast ? pr.ast->first_child : nullptr;
103+
104+
auto resolve = [](const StringRef&) -> Value {
105+
return value_null();
106+
};
107+
108+
for (auto _ : state) {
109+
Value v = evaluate_expression<Dialect::MySQL>(expr, resolve, funcs, arena);
110+
benchmark::DoNotOptimize(v);
111+
}
112+
}
113+
BENCHMARK(BM_Expr_SimpleArith);
114+
115+
// Complex: price * qty > 100
116+
static void BM_Expr_ComplexCompare(benchmark::State& state) {
117+
InMemoryCatalog catalog;
118+
catalog.add_table("", "items", {
119+
{"price", SqlType{SqlType::DOUBLE, 0, 0, false, false}, false},
120+
{"qty", SqlType::make_int(), false},
121+
});
122+
123+
Parser<Dialect::MySQL> parser;
124+
FunctionRegistry<Dialect::MySQL> funcs;
125+
funcs.register_builtins();
126+
Arena arena{65536, 1048576};
127+
128+
// Parse: SELECT price * qty > 100 FROM items
129+
const char* sql = "SELECT price * qty > 100 FROM items";
130+
size_t len = std::strlen(sql);
131+
auto pr = parser.parse(sql, len);
132+
AstNode* expr = pr.ast ? pr.ast->first_child : nullptr;
133+
134+
// Build a row with price=42.5, qty=3
135+
Row row = make_row(arena, 2);
136+
row.set(0, value_double(42.5));
137+
row.set(1, value_int(3));
138+
139+
const TableInfo* ti = catalog.get_table(StringRef{"items", 5});
140+
auto resolve = make_resolver(catalog, ti, row.values);
141+
142+
for (auto _ : state) {
143+
Value v = evaluate_expression<Dialect::MySQL>(expr, resolve, funcs, arena);
144+
benchmark::DoNotOptimize(v);
145+
}
146+
}
147+
BENCHMARK(BM_Expr_ComplexCompare);
148+
149+
// ========== Plan Building Benchmarks ==========
150+
151+
// Simple SELECT plan build
152+
static void BM_PlanBuild_Simple(benchmark::State& state) {
153+
InMemoryCatalog catalog = make_users_catalog();
154+
Parser<Dialect::MySQL> parser;
155+
156+
const char* sql = "SELECT id, name FROM users WHERE age > 21";
157+
size_t len = std::strlen(sql);
158+
159+
for (auto _ : state) {
160+
parser.reset();
161+
auto pr = parser.parse(sql, len);
162+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
163+
PlanNode* plan = builder.build(pr.ast);
164+
benchmark::DoNotOptimize(plan);
165+
}
166+
}
167+
BENCHMARK(BM_PlanBuild_Simple);
168+
169+
// Complex SELECT plan build (JOIN + GROUP BY + HAVING + ORDER BY + LIMIT)
170+
static void BM_PlanBuild_Complex(benchmark::State& state) {
171+
InMemoryCatalog catalog = make_users_catalog();
172+
add_orders_table(catalog);
173+
Parser<Dialect::MySQL> parser;
174+
175+
const char* sql =
176+
"SELECT u.id, u.name, COUNT(o.id) AS cnt FROM users u "
177+
"JOIN orders o ON u.id = o.user_id "
178+
"WHERE u.age > 18 GROUP BY u.id, u.name "
179+
"HAVING COUNT(o.id) > 2 ORDER BY cnt DESC LIMIT 10";
180+
size_t len = std::strlen(sql);
181+
182+
for (auto _ : state) {
183+
parser.reset();
184+
auto pr = parser.parse(sql, len);
185+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
186+
PlanNode* plan = builder.build(pr.ast);
187+
benchmark::DoNotOptimize(plan);
188+
}
189+
}
190+
BENCHMARK(BM_PlanBuild_Complex);
191+
192+
// ========== Full Pipeline Benchmarks ==========
193+
194+
// Parse -> plan -> optimize -> execute (simple SELECT, 100 rows)
195+
static void BM_Pipeline_Simple(benchmark::State& state) {
196+
InMemoryCatalog catalog = make_users_catalog();
197+
Arena data_arena{65536, 1048576};
198+
auto rows = make_user_rows(data_arena, 100);
199+
const TableInfo* ti = catalog.get_table(StringRef{"users", 5});
200+
InMemoryDataSource source(ti, std::move(rows));
201+
202+
FunctionRegistry<Dialect::MySQL> funcs;
203+
funcs.register_builtins();
204+
205+
const char* sql = "SELECT id, name FROM users WHERE age > 30";
206+
size_t len = std::strlen(sql);
207+
208+
for (auto _ : state) {
209+
Parser<Dialect::MySQL> parser;
210+
auto pr = parser.parse(sql, len);
211+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
212+
PlanNode* plan = builder.build(pr.ast);
213+
Optimizer<Dialect::MySQL> optimizer(catalog, funcs);
214+
plan = optimizer.optimize(plan, parser.arena());
215+
PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena());
216+
executor.add_data_source("users", &source);
217+
ResultSet rs = executor.execute(plan);
218+
benchmark::DoNotOptimize(rs.rows.size());
219+
}
220+
}
221+
BENCHMARK(BM_Pipeline_Simple);
222+
223+
// ========== Operator Benchmarks ==========
224+
225+
// Filter: scan 1000 rows, filter to ~100
226+
static void BM_Op_Filter1000(benchmark::State& state) {
227+
InMemoryCatalog catalog = make_users_catalog();
228+
Arena data_arena{262144, 1048576};
229+
auto rows = make_user_rows(data_arena, 1000);
230+
const TableInfo* ti = catalog.get_table(StringRef{"users", 5});
231+
InMemoryDataSource source(ti, std::move(rows));
232+
233+
FunctionRegistry<Dialect::MySQL> funcs;
234+
funcs.register_builtins();
235+
236+
// age > 60 filters to ~7/50 * 1000 = ~140 rows
237+
const char* sql = "SELECT id, name FROM users WHERE age > 60";
238+
size_t len = std::strlen(sql);
239+
240+
for (auto _ : state) {
241+
Parser<Dialect::MySQL> parser;
242+
auto pr = parser.parse(sql, len);
243+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
244+
PlanNode* plan = builder.build(pr.ast);
245+
PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena());
246+
executor.add_data_source("users", &source);
247+
ResultSet rs = executor.execute(plan);
248+
benchmark::DoNotOptimize(rs.rows.size());
249+
}
250+
}
251+
BENCHMARK(BM_Op_Filter1000);
252+
253+
// Join: two 1000-row tables via nested-loop
254+
static void BM_Op_Join1000(benchmark::State& state) {
255+
InMemoryCatalog catalog = make_users_catalog();
256+
add_orders_table(catalog);
257+
Arena data_arena{262144, 4194304};
258+
auto user_rows = make_user_rows(data_arena, 100);
259+
auto order_rows = make_order_rows(data_arena, 1000, 100);
260+
261+
const TableInfo* uti = catalog.get_table(StringRef{"users", 5});
262+
const TableInfo* oti = catalog.get_table(StringRef{"orders", 6});
263+
InMemoryDataSource user_src(uti, std::move(user_rows));
264+
InMemoryDataSource order_src(oti, std::move(order_rows));
265+
266+
FunctionRegistry<Dialect::MySQL> funcs;
267+
funcs.register_builtins();
268+
269+
const char* sql =
270+
"SELECT u.id, o.total FROM users u "
271+
"JOIN orders o ON u.id = o.user_id";
272+
size_t len = std::strlen(sql);
273+
274+
for (auto _ : state) {
275+
Parser<Dialect::MySQL> parser;
276+
auto pr = parser.parse(sql, len);
277+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
278+
PlanNode* plan = builder.build(pr.ast);
279+
PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena());
280+
executor.add_data_source("users", &user_src);
281+
executor.add_data_source("orders", &order_src);
282+
ResultSet rs = executor.execute(plan);
283+
benchmark::DoNotOptimize(rs.rows.size());
284+
}
285+
}
286+
BENCHMARK(BM_Op_Join1000);
287+
288+
// Sort: 1000 rows, 1 key
289+
static void BM_Op_Sort1000(benchmark::State& state) {
290+
InMemoryCatalog catalog = make_users_catalog();
291+
Arena data_arena{262144, 1048576};
292+
auto rows = make_user_rows(data_arena, 1000);
293+
const TableInfo* ti = catalog.get_table(StringRef{"users", 5});
294+
InMemoryDataSource source(ti, std::move(rows));
295+
296+
FunctionRegistry<Dialect::MySQL> funcs;
297+
funcs.register_builtins();
298+
299+
const char* sql = "SELECT id, name, age FROM users ORDER BY age DESC";
300+
size_t len = std::strlen(sql);
301+
302+
for (auto _ : state) {
303+
Parser<Dialect::MySQL> parser;
304+
auto pr = parser.parse(sql, len);
305+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
306+
PlanNode* plan = builder.build(pr.ast);
307+
PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena());
308+
executor.add_data_source("users", &source);
309+
ResultSet rs = executor.execute(plan);
310+
benchmark::DoNotOptimize(rs.rows.size());
311+
}
312+
}
313+
BENCHMARK(BM_Op_Sort1000);
314+
315+
// Aggregate: 1000 rows, ~10 groups (GROUP BY age % 10 effectively)
316+
static void BM_Op_Aggregate1000(benchmark::State& state) {
317+
InMemoryCatalog catalog = make_users_catalog();
318+
Arena data_arena{262144, 1048576};
319+
auto rows = make_user_rows(data_arena, 1000);
320+
const TableInfo* ti = catalog.get_table(StringRef{"users", 5});
321+
InMemoryDataSource source(ti, std::move(rows));
322+
323+
FunctionRegistry<Dialect::MySQL> funcs;
324+
funcs.register_builtins();
325+
326+
// age has 50 distinct values (18..67), so GROUP BY age gives ~50 groups
327+
const char* sql =
328+
"SELECT age, COUNT(id), AVG(score) FROM users GROUP BY age";
329+
size_t len = std::strlen(sql);
330+
331+
for (auto _ : state) {
332+
Parser<Dialect::MySQL> parser;
333+
auto pr = parser.parse(sql, len);
334+
PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena());
335+
PlanNode* plan = builder.build(pr.ast);
336+
PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena());
337+
executor.add_data_source("users", &source);
338+
ResultSet rs = executor.execute(plan);
339+
benchmark::DoNotOptimize(rs.rows.size());
340+
}
341+
}
342+
BENCHMARK(BM_Op_Aggregate1000);

0 commit comments

Comments
 (0)