Skip to content

Commit b7126b9

Browse files
committed
feat: add Google Benchmark performance tests for parser operations
1 parent 8c4d7de commit b7126b9

186 files changed

Lines changed: 27339 additions & 1 deletion

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ src/*_parser/*_parser.report
4545
# New parser build artifacts
4646
libsqlparser.a
4747
run_tests
48+
run_bench

Makefile.new

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,18 @@ TEST_SRCS = $(TEST_DIR)/test_main.cpp \
3131
TEST_OBJS = $(TEST_SRCS:.cpp=.o)
3232
TEST_TARGET = $(PROJECT_ROOT)/run_tests
3333

34-
.PHONY: all lib test clean
34+
# Google Benchmark
35+
GBENCH_DIR = $(PROJECT_ROOT)/third_party/benchmark
36+
GBENCH_SRCS = $(filter-out $(GBENCH_DIR)/src/benchmark_main.cc, $(wildcard $(GBENCH_DIR)/src/*.cc))
37+
GBENCH_OBJS = $(GBENCH_SRCS:.cc=.o)
38+
GBENCH_CPPFLAGS = -I$(GBENCH_DIR)/include -I$(GBENCH_DIR)/src -DHAVE_STD_REGEX -DHAVE_STEADY_CLOCK
39+
40+
BENCH_DIR = $(PROJECT_ROOT)/bench
41+
BENCH_SRCS = $(BENCH_DIR)/bench_main.cpp $(BENCH_DIR)/bench_parser.cpp
42+
BENCH_OBJS = $(BENCH_SRCS:.cpp=.o)
43+
BENCH_TARGET = $(PROJECT_ROOT)/run_bench
44+
45+
.PHONY: all lib test bench clean
3546

3647
all: lib test
3748

@@ -58,6 +69,20 @@ test: $(TEST_TARGET)
5869
$(TEST_TARGET): $(TEST_OBJS) $(GTEST_OBJ) $(LIB_TARGET)
5970
$(CXX) $(CXXFLAGS) -o $@ $(TEST_OBJS) $(GTEST_OBJ) -L$(PROJECT_ROOT) -lsqlparser -lpthread
6071

72+
# Benchmark objects
73+
$(GBENCH_DIR)/src/%.o: $(GBENCH_DIR)/src/%.cc
74+
$(CXX) $(CXXFLAGS) $(GBENCH_CPPFLAGS) -c $< -o $@
75+
76+
$(BENCH_DIR)/%.o: $(BENCH_DIR)/%.cpp
77+
$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(GBENCH_CPPFLAGS) -c $< -o $@
78+
79+
bench: $(BENCH_TARGET)
80+
./$(BENCH_TARGET) --benchmark_format=console
81+
82+
$(BENCH_TARGET): $(BENCH_OBJS) $(GBENCH_OBJS) $(LIB_TARGET)
83+
$(CXX) $(CXXFLAGS) -o $@ $(BENCH_OBJS) $(GBENCH_OBJS) -L$(PROJECT_ROOT) -lsqlparser -lpthread
84+
6185
clean:
6286
rm -f $(LIB_OBJS) $(LIB_TARGET) $(TEST_OBJS) $(GTEST_OBJ) $(TEST_TARGET)
87+
rm -f $(BENCH_OBJS) $(GBENCH_OBJS) $(BENCH_TARGET)
6388
@echo "Cleaned."

bench/bench_main.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include <benchmark/benchmark.h>
2+
3+
BENCHMARK_MAIN();

bench/bench_parser.cpp

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
#include <benchmark/benchmark.h>
2+
#include "sql_parser/parser.h"
3+
#include "sql_parser/emitter.h"
4+
5+
using namespace sql_parser;
6+
7+
// ========== Tier 2: Classification ==========
8+
// Target: <100ns
9+
10+
static void BM_Classify_Insert(benchmark::State& state) {
11+
Parser<Dialect::MySQL> parser;
12+
const char* sql = "INSERT INTO users VALUES (1, 'name', 'email')";
13+
size_t len = strlen(sql);
14+
for (auto _ : state) {
15+
auto r = parser.parse(sql, len);
16+
benchmark::DoNotOptimize(r.stmt_type);
17+
}
18+
}
19+
BENCHMARK(BM_Classify_Insert);
20+
21+
static void BM_Classify_Update(benchmark::State& state) {
22+
Parser<Dialect::MySQL> parser;
23+
const char* sql = "UPDATE users SET name = 'x' WHERE id = 1";
24+
size_t len = strlen(sql);
25+
for (auto _ : state) {
26+
auto r = parser.parse(sql, len);
27+
benchmark::DoNotOptimize(r.stmt_type);
28+
}
29+
}
30+
BENCHMARK(BM_Classify_Update);
31+
32+
static void BM_Classify_Delete(benchmark::State& state) {
33+
Parser<Dialect::MySQL> parser;
34+
const char* sql = "DELETE FROM users WHERE id = 1";
35+
size_t len = strlen(sql);
36+
for (auto _ : state) {
37+
auto r = parser.parse(sql, len);
38+
benchmark::DoNotOptimize(r.stmt_type);
39+
}
40+
}
41+
BENCHMARK(BM_Classify_Delete);
42+
43+
static void BM_Classify_Begin(benchmark::State& state) {
44+
Parser<Dialect::MySQL> parser;
45+
const char* sql = "BEGIN";
46+
size_t len = strlen(sql);
47+
for (auto _ : state) {
48+
auto r = parser.parse(sql, len);
49+
benchmark::DoNotOptimize(r.stmt_type);
50+
}
51+
}
52+
BENCHMARK(BM_Classify_Begin);
53+
54+
// ========== Tier 1: SET parse ==========
55+
// Target: <300ns
56+
57+
static void BM_Set_Simple(benchmark::State& state) {
58+
Parser<Dialect::MySQL> parser;
59+
const char* sql = "SET @@session.wait_timeout = 600";
60+
size_t len = strlen(sql);
61+
for (auto _ : state) {
62+
auto r = parser.parse(sql, len);
63+
benchmark::DoNotOptimize(r.ast);
64+
}
65+
}
66+
BENCHMARK(BM_Set_Simple);
67+
68+
static void BM_Set_Names(benchmark::State& state) {
69+
Parser<Dialect::MySQL> parser;
70+
const char* sql = "SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci";
71+
size_t len = strlen(sql);
72+
for (auto _ : state) {
73+
auto r = parser.parse(sql, len);
74+
benchmark::DoNotOptimize(r.ast);
75+
}
76+
}
77+
BENCHMARK(BM_Set_Names);
78+
79+
static void BM_Set_MultiVar(benchmark::State& state) {
80+
Parser<Dialect::MySQL> parser;
81+
const char* sql = "SET autocommit = 1, wait_timeout = 28800, sql_mode = 'STRICT_TRANS_TABLES'";
82+
size_t len = strlen(sql);
83+
for (auto _ : state) {
84+
auto r = parser.parse(sql, len);
85+
benchmark::DoNotOptimize(r.ast);
86+
}
87+
}
88+
BENCHMARK(BM_Set_MultiVar);
89+
90+
static void BM_Set_FunctionRHS(benchmark::State& state) {
91+
Parser<Dialect::MySQL> parser;
92+
const char* sql = "SET sql_mode = CONCAT(@@sql_mode, ',STRICT_TRANS_TABLES')";
93+
size_t len = strlen(sql);
94+
for (auto _ : state) {
95+
auto r = parser.parse(sql, len);
96+
benchmark::DoNotOptimize(r.ast);
97+
}
98+
}
99+
BENCHMARK(BM_Set_FunctionRHS);
100+
101+
// ========== Tier 1: SELECT parse ==========
102+
// Target: <500ns simple, <2us complex
103+
104+
static void BM_Select_Simple(benchmark::State& state) {
105+
Parser<Dialect::MySQL> parser;
106+
const char* sql = "SELECT col FROM t WHERE id = 1";
107+
size_t len = strlen(sql);
108+
for (auto _ : state) {
109+
auto r = parser.parse(sql, len);
110+
benchmark::DoNotOptimize(r.ast);
111+
}
112+
}
113+
BENCHMARK(BM_Select_Simple);
114+
115+
static void BM_Select_MultiColumn(benchmark::State& state) {
116+
Parser<Dialect::MySQL> parser;
117+
const char* sql = "SELECT id, name, email, status FROM users WHERE active = 1 ORDER BY name LIMIT 100";
118+
size_t len = strlen(sql);
119+
for (auto _ : state) {
120+
auto r = parser.parse(sql, len);
121+
benchmark::DoNotOptimize(r.ast);
122+
}
123+
}
124+
BENCHMARK(BM_Select_MultiColumn);
125+
126+
static void BM_Select_Join(benchmark::State& state) {
127+
Parser<Dialect::MySQL> parser;
128+
const char* sql = "SELECT u.id, o.total FROM users u JOIN orders o ON u.id = o.user_id WHERE o.status = 'active'";
129+
size_t len = strlen(sql);
130+
for (auto _ : state) {
131+
auto r = parser.parse(sql, len);
132+
benchmark::DoNotOptimize(r.ast);
133+
}
134+
}
135+
BENCHMARK(BM_Select_Join);
136+
137+
static void BM_Select_Complex(benchmark::State& state) {
138+
Parser<Dialect::MySQL> parser;
139+
const char* sql =
140+
"SELECT u.id, u.name, COUNT(o.id) AS order_count "
141+
"FROM users u "
142+
"LEFT JOIN orders o ON u.id = o.user_id "
143+
"WHERE u.status = 'active' AND u.created_at > '2024-01-01' "
144+
"GROUP BY u.id, u.name "
145+
"HAVING COUNT(o.id) > 5 "
146+
"ORDER BY order_count DESC "
147+
"LIMIT 50 OFFSET 10";
148+
size_t len = strlen(sql);
149+
for (auto _ : state) {
150+
auto r = parser.parse(sql, len);
151+
benchmark::DoNotOptimize(r.ast);
152+
}
153+
}
154+
BENCHMARK(BM_Select_Complex);
155+
156+
static void BM_Select_MultiJoin(benchmark::State& state) {
157+
Parser<Dialect::MySQL> parser;
158+
const char* sql =
159+
"SELECT a.id, b.name, c.value, d.total "
160+
"FROM t1 a "
161+
"JOIN t2 b ON a.id = b.a_id "
162+
"LEFT JOIN t3 c ON b.id = c.b_id "
163+
"JOIN t4 d ON c.id = d.c_id "
164+
"WHERE a.status = 1 AND d.total > 100 "
165+
"ORDER BY d.total DESC "
166+
"LIMIT 20";
167+
size_t len = strlen(sql);
168+
for (auto _ : state) {
169+
auto r = parser.parse(sql, len);
170+
benchmark::DoNotOptimize(r.ast);
171+
}
172+
}
173+
BENCHMARK(BM_Select_MultiJoin);
174+
175+
// ========== Query Reconstruction (round-trip) ==========
176+
// Target: <500ns
177+
178+
static void BM_Emit_SetSimple(benchmark::State& state) {
179+
Parser<Dialect::MySQL> parser;
180+
const char* sql = "SET autocommit = 1";
181+
size_t len = strlen(sql);
182+
for (auto _ : state) {
183+
auto r = parser.parse(sql, len);
184+
Emitter<Dialect::MySQL> emitter(parser.arena());
185+
emitter.emit(r.ast);
186+
benchmark::DoNotOptimize(emitter.result());
187+
}
188+
}
189+
BENCHMARK(BM_Emit_SetSimple);
190+
191+
static void BM_Emit_SelectSimple(benchmark::State& state) {
192+
Parser<Dialect::MySQL> parser;
193+
const char* sql = "SELECT * FROM users WHERE id = 1";
194+
size_t len = strlen(sql);
195+
for (auto _ : state) {
196+
auto r = parser.parse(sql, len);
197+
Emitter<Dialect::MySQL> emitter(parser.arena());
198+
emitter.emit(r.ast);
199+
benchmark::DoNotOptimize(emitter.result());
200+
}
201+
}
202+
BENCHMARK(BM_Emit_SelectSimple);
203+
204+
// ========== Arena reset ==========
205+
// Target: <10ns
206+
207+
static void BM_ArenaReset(benchmark::State& state) {
208+
Arena arena(65536);
209+
for (auto _ : state) {
210+
arena.allocate(256); // allocate something
211+
arena.reset();
212+
benchmark::DoNotOptimize(arena.bytes_used());
213+
}
214+
}
215+
BENCHMARK(BM_ArenaReset);
216+
217+
// ========== PostgreSQL ==========
218+
219+
static void BM_PgSQL_Select_Simple(benchmark::State& state) {
220+
Parser<Dialect::PostgreSQL> parser;
221+
const char* sql = "SELECT col FROM t WHERE id = 1";
222+
size_t len = strlen(sql);
223+
for (auto _ : state) {
224+
auto r = parser.parse(sql, len);
225+
benchmark::DoNotOptimize(r.ast);
226+
}
227+
}
228+
BENCHMARK(BM_PgSQL_Select_Simple);
229+
230+
static void BM_PgSQL_Set_Simple(benchmark::State& state) {
231+
Parser<Dialect::PostgreSQL> parser;
232+
const char* sql = "SET work_mem = '256MB'";
233+
size_t len = strlen(sql);
234+
for (auto _ : state) {
235+
auto r = parser.parse(sql, len);
236+
benchmark::DoNotOptimize(r.ast);
237+
}
238+
}
239+
BENCHMARK(BM_PgSQL_Set_Simple);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
Language: Cpp
3+
BasedOnStyle: Google
4+
PointerAlignment: Left
5+
...

third_party/benchmark/.clang-tidy

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
Checks: 'clang-analyzer-*,readability-redundant-*,performance-*'
3+
WarningsAsErrors: 'clang-analyzer-*,readability-redundant-*,performance-*'
4+
HeaderFilterRegex: '.*'
5+
FormatStyle: none
6+
User: user
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
---
2+
name: Bug report
3+
about: Create a report to help us improve
4+
title: "[BUG]"
5+
labels: ''
6+
assignees: ''
7+
8+
---
9+
10+
**Describe the bug**
11+
A clear and concise description of what the bug is.
12+
13+
**System**
14+
Which OS, compiler, and compiler version are you using:
15+
- OS:
16+
- Compiler and version:
17+
18+
**To reproduce**
19+
Steps to reproduce the behavior:
20+
1. sync to commit ...
21+
2. cmake/bazel...
22+
3. make ...
23+
4. See error
24+
25+
**Expected behavior**
26+
A clear and concise description of what you expected to happen.
27+
28+
**Screenshots**
29+
If applicable, add screenshots to help explain your problem.
30+
31+
**Additional context**
32+
Add any other context about the problem here.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
name: Feature request
3+
about: Suggest an idea for this project
4+
title: "[FR]"
5+
labels: ''
6+
assignees: ''
7+
8+
---
9+
10+
**Is your feature request related to a problem? Please describe.**
11+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12+
13+
**Describe the solution you'd like**
14+
A clear and concise description of what you want to happen.
15+
16+
**Describe alternatives you've considered**
17+
A clear and concise description of any alternative solutions or features you've considered.
18+
19+
**Additional context**
20+
Add any other context or screenshots about the feature request here.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
if ! bazel version; then
2+
arch=$(uname -m)
3+
if [ "$arch" == "aarch64" ]; then
4+
arch="arm64"
5+
fi
6+
echo "Downloading $arch Bazel binary from GitHub releases."
7+
curl -L -o $HOME/bin/bazel --create-dirs "https://github.com/bazelbuild/bazel/releases/download/7.1.1/bazel-7.1.1-linux-$arch"
8+
chmod +x $HOME/bin/bazel
9+
else
10+
# Bazel is installed for the correct architecture
11+
exit 0
12+
fi
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
# Checkout LLVM sources
6+
git clone --depth=1 --branch llvmorg-16.0.6 https://github.com/llvm/llvm-project.git llvm-project
7+
8+
## Setup libc++ options
9+
if [ -z "$BUILD_32_BITS" ]; then
10+
export BUILD_32_BITS=OFF && echo disabling 32 bit build
11+
fi
12+
13+
## Build and install libc++ (Use unstable ABI for better sanitizer coverage)
14+
mkdir llvm-build && cd llvm-build
15+
cmake -DCMAKE_C_COMPILER=${CC} \
16+
-DCMAKE_CXX_COMPILER=${CXX} \
17+
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
18+
-DCMAKE_INSTALL_PREFIX=/usr \
19+
-DLIBCXX_ABI_UNSTABLE=OFF \
20+
-DLLVM_USE_SANITIZER=${LIBCXX_SANITIZER} \
21+
-DLLVM_BUILD_32_BITS=${BUILD_32_BITS} \
22+
-DLLVM_ENABLE_RUNTIMES='libcxx;libcxxabi;libunwind' \
23+
-G "Unix Makefiles" \
24+
../llvm-project/runtimes/
25+
make -j cxx cxxabi unwind
26+
cd ..

0 commit comments

Comments
 (0)