|
| 1 | +// bench_engine.cpp — Engine execution benchmarks |
| 2 | +// |
| 3 | +// Benchmarks the query engine pipeline: expression evaluation, plan building, |
| 4 | +// full pipeline execution, filter/join/sort/aggregate operators. |
| 5 | + |
| 6 | +#include <benchmark/benchmark.h> |
| 7 | + |
| 8 | +#include "sql_parser/parser.h" |
| 9 | +#include "sql_parser/common.h" |
| 10 | +#include "sql_engine/session.h" |
| 11 | +#include "sql_engine/in_memory_catalog.h" |
| 12 | +#include "sql_engine/data_source.h" |
| 13 | +#include "sql_engine/local_txn.h" |
| 14 | +#include "sql_engine/expression_eval.h" |
| 15 | +#include "sql_engine/function_registry.h" |
| 16 | +#include "sql_engine/plan_builder.h" |
| 17 | +#include "sql_engine/plan_executor.h" |
| 18 | +#include "sql_engine/optimizer.h" |
| 19 | +#include "sql_engine/catalog_resolver.h" |
| 20 | +#include "sql_engine/value.h" |
| 21 | +#include "sql_engine/row.h" |
| 22 | + |
| 23 | +#include <vector> |
| 24 | +#include <string> |
| 25 | +#include <cstring> |
| 26 | + |
| 27 | +using namespace sql_parser; |
| 28 | +using namespace sql_engine; |
| 29 | + |
| 30 | +// ========== Helpers ========== |
| 31 | + |
| 32 | +// Build a catalog with a "users" table: id INT, name VARCHAR, age INT, score DOUBLE |
| 33 | +static InMemoryCatalog make_users_catalog() { |
| 34 | + InMemoryCatalog catalog; |
| 35 | + catalog.add_table("", "users", { |
| 36 | + {"id", SqlType::make_int(), false}, |
| 37 | + {"name", SqlType::make_varchar(255), true}, |
| 38 | + {"age", SqlType::make_int(), true}, |
| 39 | + {"score", SqlType{SqlType::DOUBLE, 0, 0, false, false}, true}, |
| 40 | + }); |
| 41 | + return catalog; |
| 42 | +} |
| 43 | + |
| 44 | +// Build a catalog with "orders" table: id INT, user_id INT, total DOUBLE |
| 45 | +static void add_orders_table(InMemoryCatalog& catalog) { |
| 46 | + catalog.add_table("", "orders", { |
| 47 | + {"id", SqlType::make_int(), false}, |
| 48 | + {"user_id", SqlType::make_int(), true}, |
| 49 | + {"total", SqlType{SqlType::DOUBLE, 0, 0, false, false}, true}, |
| 50 | + }); |
| 51 | +} |
| 52 | + |
| 53 | +// Build N rows for users table |
| 54 | +static std::vector<Row> make_user_rows(Arena& arena, int n) { |
| 55 | + std::vector<Row> rows; |
| 56 | + rows.reserve(n); |
| 57 | + for (int i = 0; i < n; ++i) { |
| 58 | + Row r = make_row(arena, 4); |
| 59 | + r.set(0, value_int(static_cast<int64_t>(i + 1))); |
| 60 | + |
| 61 | + char buf[32]; |
| 62 | + int len = snprintf(buf, sizeof(buf), "user_%d", i); |
| 63 | + char* name = static_cast<char*>(arena.allocate(static_cast<uint32_t>(len))); |
| 64 | + std::memcpy(name, buf, static_cast<size_t>(len)); |
| 65 | + r.set(1, value_string(StringRef{name, static_cast<uint32_t>(len)})); |
| 66 | + |
| 67 | + r.set(2, value_int(static_cast<int64_t>(18 + (i % 50)))); |
| 68 | + r.set(3, value_double(static_cast<double>(50 + (i % 100)))); |
| 69 | + rows.push_back(r); |
| 70 | + } |
| 71 | + return rows; |
| 72 | +} |
| 73 | + |
| 74 | +// Build N rows for orders table |
| 75 | +static std::vector<Row> make_order_rows(Arena& arena, int n, int max_user_id) { |
| 76 | + std::vector<Row> rows; |
| 77 | + rows.reserve(n); |
| 78 | + for (int i = 0; i < n; ++i) { |
| 79 | + Row r = make_row(arena, 3); |
| 80 | + r.set(0, value_int(static_cast<int64_t>(i + 1))); |
| 81 | + r.set(1, value_int(static_cast<int64_t>((i % max_user_id) + 1))); |
| 82 | + r.set(2, value_double(static_cast<double>(10 + (i % 200)))); |
| 83 | + rows.push_back(r); |
| 84 | + } |
| 85 | + return rows; |
| 86 | +} |
| 87 | + |
| 88 | +// ========== Expression Evaluation Benchmarks ========== |
| 89 | + |
| 90 | +// Simple: 1 + 2 |
| 91 | +static void BM_Expr_SimpleArith(benchmark::State& state) { |
| 92 | + Parser<Dialect::MySQL> parser; |
| 93 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 94 | + funcs.register_builtins(); |
| 95 | + Arena arena{65536, 1048576}; |
| 96 | + |
| 97 | + const char* sql = "SELECT 1 + 2"; |
| 98 | + size_t len = std::strlen(sql); |
| 99 | + |
| 100 | + auto pr = parser.parse(sql, len); |
| 101 | + // The expression is in the SELECT list: pr.ast->first_child |
| 102 | + AstNode* expr = pr.ast ? pr.ast->first_child : nullptr; |
| 103 | + |
| 104 | + auto resolve = [](const StringRef&) -> Value { |
| 105 | + return value_null(); |
| 106 | + }; |
| 107 | + |
| 108 | + for (auto _ : state) { |
| 109 | + Value v = evaluate_expression<Dialect::MySQL>(expr, resolve, funcs, arena); |
| 110 | + benchmark::DoNotOptimize(v); |
| 111 | + } |
| 112 | +} |
| 113 | +BENCHMARK(BM_Expr_SimpleArith); |
| 114 | + |
| 115 | +// Complex: price * qty > 100 |
| 116 | +static void BM_Expr_ComplexCompare(benchmark::State& state) { |
| 117 | + InMemoryCatalog catalog; |
| 118 | + catalog.add_table("", "items", { |
| 119 | + {"price", SqlType{SqlType::DOUBLE, 0, 0, false, false}, false}, |
| 120 | + {"qty", SqlType::make_int(), false}, |
| 121 | + }); |
| 122 | + |
| 123 | + Parser<Dialect::MySQL> parser; |
| 124 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 125 | + funcs.register_builtins(); |
| 126 | + Arena arena{65536, 1048576}; |
| 127 | + |
| 128 | + // Parse: SELECT price * qty > 100 FROM items |
| 129 | + const char* sql = "SELECT price * qty > 100 FROM items"; |
| 130 | + size_t len = std::strlen(sql); |
| 131 | + auto pr = parser.parse(sql, len); |
| 132 | + AstNode* expr = pr.ast ? pr.ast->first_child : nullptr; |
| 133 | + |
| 134 | + // Build a row with price=42.5, qty=3 |
| 135 | + Row row = make_row(arena, 2); |
| 136 | + row.set(0, value_double(42.5)); |
| 137 | + row.set(1, value_int(3)); |
| 138 | + |
| 139 | + const TableInfo* ti = catalog.get_table(StringRef{"items", 5}); |
| 140 | + auto resolve = make_resolver(catalog, ti, row.values); |
| 141 | + |
| 142 | + for (auto _ : state) { |
| 143 | + Value v = evaluate_expression<Dialect::MySQL>(expr, resolve, funcs, arena); |
| 144 | + benchmark::DoNotOptimize(v); |
| 145 | + } |
| 146 | +} |
| 147 | +BENCHMARK(BM_Expr_ComplexCompare); |
| 148 | + |
| 149 | +// ========== Plan Building Benchmarks ========== |
| 150 | + |
| 151 | +// Simple SELECT plan build |
| 152 | +static void BM_PlanBuild_Simple(benchmark::State& state) { |
| 153 | + InMemoryCatalog catalog = make_users_catalog(); |
| 154 | + Parser<Dialect::MySQL> parser; |
| 155 | + |
| 156 | + const char* sql = "SELECT id, name FROM users WHERE age > 21"; |
| 157 | + size_t len = std::strlen(sql); |
| 158 | + |
| 159 | + for (auto _ : state) { |
| 160 | + parser.reset(); |
| 161 | + auto pr = parser.parse(sql, len); |
| 162 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 163 | + PlanNode* plan = builder.build(pr.ast); |
| 164 | + benchmark::DoNotOptimize(plan); |
| 165 | + } |
| 166 | +} |
| 167 | +BENCHMARK(BM_PlanBuild_Simple); |
| 168 | + |
| 169 | +// Complex SELECT plan build (JOIN + GROUP BY + HAVING + ORDER BY + LIMIT) |
| 170 | +static void BM_PlanBuild_Complex(benchmark::State& state) { |
| 171 | + InMemoryCatalog catalog = make_users_catalog(); |
| 172 | + add_orders_table(catalog); |
| 173 | + Parser<Dialect::MySQL> parser; |
| 174 | + |
| 175 | + const char* sql = |
| 176 | + "SELECT u.id, u.name, COUNT(o.id) AS cnt FROM users u " |
| 177 | + "JOIN orders o ON u.id = o.user_id " |
| 178 | + "WHERE u.age > 18 GROUP BY u.id, u.name " |
| 179 | + "HAVING COUNT(o.id) > 2 ORDER BY cnt DESC LIMIT 10"; |
| 180 | + size_t len = std::strlen(sql); |
| 181 | + |
| 182 | + for (auto _ : state) { |
| 183 | + parser.reset(); |
| 184 | + auto pr = parser.parse(sql, len); |
| 185 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 186 | + PlanNode* plan = builder.build(pr.ast); |
| 187 | + benchmark::DoNotOptimize(plan); |
| 188 | + } |
| 189 | +} |
| 190 | +BENCHMARK(BM_PlanBuild_Complex); |
| 191 | + |
| 192 | +// ========== Full Pipeline Benchmarks ========== |
| 193 | + |
| 194 | +// Parse -> plan -> optimize -> execute (simple SELECT, 100 rows) |
| 195 | +static void BM_Pipeline_Simple(benchmark::State& state) { |
| 196 | + InMemoryCatalog catalog = make_users_catalog(); |
| 197 | + Arena data_arena{65536, 1048576}; |
| 198 | + auto rows = make_user_rows(data_arena, 100); |
| 199 | + const TableInfo* ti = catalog.get_table(StringRef{"users", 5}); |
| 200 | + InMemoryDataSource source(ti, std::move(rows)); |
| 201 | + |
| 202 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 203 | + funcs.register_builtins(); |
| 204 | + |
| 205 | + const char* sql = "SELECT id, name FROM users WHERE age > 30"; |
| 206 | + size_t len = std::strlen(sql); |
| 207 | + |
| 208 | + for (auto _ : state) { |
| 209 | + Parser<Dialect::MySQL> parser; |
| 210 | + auto pr = parser.parse(sql, len); |
| 211 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 212 | + PlanNode* plan = builder.build(pr.ast); |
| 213 | + Optimizer<Dialect::MySQL> optimizer(catalog, funcs); |
| 214 | + plan = optimizer.optimize(plan, parser.arena()); |
| 215 | + PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena()); |
| 216 | + executor.add_data_source("users", &source); |
| 217 | + ResultSet rs = executor.execute(plan); |
| 218 | + benchmark::DoNotOptimize(rs.rows.size()); |
| 219 | + } |
| 220 | +} |
| 221 | +BENCHMARK(BM_Pipeline_Simple); |
| 222 | + |
| 223 | +// ========== Operator Benchmarks ========== |
| 224 | + |
| 225 | +// Filter: scan 1000 rows, filter to ~100 |
| 226 | +static void BM_Op_Filter1000(benchmark::State& state) { |
| 227 | + InMemoryCatalog catalog = make_users_catalog(); |
| 228 | + Arena data_arena{262144, 1048576}; |
| 229 | + auto rows = make_user_rows(data_arena, 1000); |
| 230 | + const TableInfo* ti = catalog.get_table(StringRef{"users", 5}); |
| 231 | + InMemoryDataSource source(ti, std::move(rows)); |
| 232 | + |
| 233 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 234 | + funcs.register_builtins(); |
| 235 | + |
| 236 | + // age > 60 filters to ~7/50 * 1000 = ~140 rows |
| 237 | + const char* sql = "SELECT id, name FROM users WHERE age > 60"; |
| 238 | + size_t len = std::strlen(sql); |
| 239 | + |
| 240 | + for (auto _ : state) { |
| 241 | + Parser<Dialect::MySQL> parser; |
| 242 | + auto pr = parser.parse(sql, len); |
| 243 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 244 | + PlanNode* plan = builder.build(pr.ast); |
| 245 | + PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena()); |
| 246 | + executor.add_data_source("users", &source); |
| 247 | + ResultSet rs = executor.execute(plan); |
| 248 | + benchmark::DoNotOptimize(rs.rows.size()); |
| 249 | + } |
| 250 | +} |
| 251 | +BENCHMARK(BM_Op_Filter1000); |
| 252 | + |
| 253 | +// Join: two 1000-row tables via nested-loop |
| 254 | +static void BM_Op_Join1000(benchmark::State& state) { |
| 255 | + InMemoryCatalog catalog = make_users_catalog(); |
| 256 | + add_orders_table(catalog); |
| 257 | + Arena data_arena{262144, 4194304}; |
| 258 | + auto user_rows = make_user_rows(data_arena, 100); |
| 259 | + auto order_rows = make_order_rows(data_arena, 1000, 100); |
| 260 | + |
| 261 | + const TableInfo* uti = catalog.get_table(StringRef{"users", 5}); |
| 262 | + const TableInfo* oti = catalog.get_table(StringRef{"orders", 6}); |
| 263 | + InMemoryDataSource user_src(uti, std::move(user_rows)); |
| 264 | + InMemoryDataSource order_src(oti, std::move(order_rows)); |
| 265 | + |
| 266 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 267 | + funcs.register_builtins(); |
| 268 | + |
| 269 | + const char* sql = |
| 270 | + "SELECT u.id, o.total FROM users u " |
| 271 | + "JOIN orders o ON u.id = o.user_id"; |
| 272 | + size_t len = std::strlen(sql); |
| 273 | + |
| 274 | + for (auto _ : state) { |
| 275 | + Parser<Dialect::MySQL> parser; |
| 276 | + auto pr = parser.parse(sql, len); |
| 277 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 278 | + PlanNode* plan = builder.build(pr.ast); |
| 279 | + PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena()); |
| 280 | + executor.add_data_source("users", &user_src); |
| 281 | + executor.add_data_source("orders", &order_src); |
| 282 | + ResultSet rs = executor.execute(plan); |
| 283 | + benchmark::DoNotOptimize(rs.rows.size()); |
| 284 | + } |
| 285 | +} |
| 286 | +BENCHMARK(BM_Op_Join1000); |
| 287 | + |
| 288 | +// Sort: 1000 rows, 1 key |
| 289 | +static void BM_Op_Sort1000(benchmark::State& state) { |
| 290 | + InMemoryCatalog catalog = make_users_catalog(); |
| 291 | + Arena data_arena{262144, 1048576}; |
| 292 | + auto rows = make_user_rows(data_arena, 1000); |
| 293 | + const TableInfo* ti = catalog.get_table(StringRef{"users", 5}); |
| 294 | + InMemoryDataSource source(ti, std::move(rows)); |
| 295 | + |
| 296 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 297 | + funcs.register_builtins(); |
| 298 | + |
| 299 | + const char* sql = "SELECT id, name, age FROM users ORDER BY age DESC"; |
| 300 | + size_t len = std::strlen(sql); |
| 301 | + |
| 302 | + for (auto _ : state) { |
| 303 | + Parser<Dialect::MySQL> parser; |
| 304 | + auto pr = parser.parse(sql, len); |
| 305 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 306 | + PlanNode* plan = builder.build(pr.ast); |
| 307 | + PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena()); |
| 308 | + executor.add_data_source("users", &source); |
| 309 | + ResultSet rs = executor.execute(plan); |
| 310 | + benchmark::DoNotOptimize(rs.rows.size()); |
| 311 | + } |
| 312 | +} |
| 313 | +BENCHMARK(BM_Op_Sort1000); |
| 314 | + |
| 315 | +// Aggregate: 1000 rows, ~10 groups (GROUP BY age % 10 effectively) |
| 316 | +static void BM_Op_Aggregate1000(benchmark::State& state) { |
| 317 | + InMemoryCatalog catalog = make_users_catalog(); |
| 318 | + Arena data_arena{262144, 1048576}; |
| 319 | + auto rows = make_user_rows(data_arena, 1000); |
| 320 | + const TableInfo* ti = catalog.get_table(StringRef{"users", 5}); |
| 321 | + InMemoryDataSource source(ti, std::move(rows)); |
| 322 | + |
| 323 | + FunctionRegistry<Dialect::MySQL> funcs; |
| 324 | + funcs.register_builtins(); |
| 325 | + |
| 326 | + // age has 50 distinct values (18..67), so GROUP BY age gives ~50 groups |
| 327 | + const char* sql = |
| 328 | + "SELECT age, COUNT(id), AVG(score) FROM users GROUP BY age"; |
| 329 | + size_t len = std::strlen(sql); |
| 330 | + |
| 331 | + for (auto _ : state) { |
| 332 | + Parser<Dialect::MySQL> parser; |
| 333 | + auto pr = parser.parse(sql, len); |
| 334 | + PlanBuilder<Dialect::MySQL> builder(catalog, parser.arena()); |
| 335 | + PlanNode* plan = builder.build(pr.ast); |
| 336 | + PlanExecutor<Dialect::MySQL> executor(funcs, catalog, parser.arena()); |
| 337 | + executor.add_data_source("users", &source); |
| 338 | + ResultSet rs = executor.execute(plan); |
| 339 | + benchmark::DoNotOptimize(rs.rows.size()); |
| 340 | + } |
| 341 | +} |
| 342 | +BENCHMARK(BM_Op_Aggregate1000); |
0 commit comments