ProxySQL
diff --git a/‎Makefile‎
Lines changed: 1 addition & 0 deletions b/‎Makefile‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎include/sql_engine/connection_pool.h‎
Lines changed: 25 additions & 2 deletions b/‎include/sql_engine/connection_pool.h‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎include/sql_engine/distributed_txn.h‎
Lines changed: 153 additions & 0 deletions b/‎include/sql_engine/distributed_txn.h‎
Lines changed: 153 additions & 0 deletions
@@ -81,6 +81,7 @@ TEST_SRCS = $(TEST_DIR)/test_main.cpp \
             $(TEST_DIR)/test_window.cpp \
             $(TEST_DIR)/test_cte.cpp \
             $(TEST_DIR)/test_datetime_format.cpp \
+            $(TEST_DIR)/test_datetime_funcs.cpp \
             $(TEST_DIR)/test_result_set.cpp
 TEST_OBJS = $(TEST_SRCS:.cpp=.o)
 TEST_TARGET = $(PROJECT_ROOT)/run_tests
 
@@ -19,6 +19,25 @@
 
 namespace sql_engine {
 
+// Default timeouts (seconds). These bound how long any single libmysqlclient
+// read or write can block. Without them, a wedged backend or a network
+// partition during XA PREPARE would hang the entire 2PC coordinator
+// indefinitely. Set generously enough that healthy OLTP queries never hit
+// them (30s is well above normal p99 latency on any remotely sane backend).
+//
+// These are set at mysql_options time, so they apply to the current
+// connection only; a different pooled connection on the same backend gets
+// the same default.
+#ifndef SQL_ENGINE_MYSQL_CONNECT_TIMEOUT_SEC
+#define SQL_ENGINE_MYSQL_CONNECT_TIMEOUT_SEC 5
+#endif
+#ifndef SQL_ENGINE_MYSQL_READ_TIMEOUT_SEC
+#define SQL_ENGINE_MYSQL_READ_TIMEOUT_SEC 30
+#endif
+#ifndef SQL_ENGINE_MYSQL_WRITE_TIMEOUT_SEC
+#define SQL_ENGINE_MYSQL_WRITE_TIMEOUT_SEC 30
+#endif
+
 class ConnectionPool {
 public:
     ConnectionPool() = default;
@@ -86,8 +105,12 @@ class ConnectionPool {
         MYSQL* c = mysql_init(nullptr);
         if (!c) throw std::runtime_error("mysql_init failed for " + cfg.name);
 
-        unsigned int timeout = 5;
-        mysql_options(c, MYSQL_OPT_CONNECT_TIMEOUT, &timeout);
+        unsigned int connect_timeout = SQL_ENGINE_MYSQL_CONNECT_TIMEOUT_SEC;
+        unsigned int read_timeout    = SQL_ENGINE_MYSQL_READ_TIMEOUT_SEC;
+        unsigned int write_timeout   = SQL_ENGINE_MYSQL_WRITE_TIMEOUT_SEC;
+        mysql_options(c, MYSQL_OPT_CONNECT_TIMEOUT, &connect_timeout);
+        mysql_options(c, MYSQL_OPT_READ_TIMEOUT,    &read_timeout);
+        mysql_options(c, MYSQL_OPT_WRITE_TIMEOUT,   &write_timeout);
 
         if (!mysql_real_connect(c, cfg.host.c_str(), cfg.user.c_str(),
                                 cfg.password.c_str(), cfg.database.c_str(),
 
@@ -4,6 +4,7 @@
 #include "sql_engine/transaction_manager.h"
 #include "sql_engine/remote_executor.h"
 #include "sql_engine/shard_map.h"
+#include "sql_engine/durable_txn_log.h"
 #include "sql_parser/common.h"
 
 #include <string>
@@ -37,6 +38,44 @@ class DistributedTransactionManager : public TransactionManager {
                                    BackendDialect dialect = BackendDialect::MYSQL)
         : executor_(executor), dialect_(dialect) {}
 
+    // Attach a durable write-ahead log for 2PC recovery. Optional but
+    // strongly recommended for any real workload: without it, a crash
+    // between phase 1 and phase 2 leaves prepared transactions on every
+    // backend with no automatic recovery path.
+    //
+    // The log pointer must outlive this manager. Pass nullptr to disable
+    // logging (the default -- matches pre-existing behavior).
+    void set_durable_log(DurableTransactionLog* log) { txn_log_ = log; }
+
+    // Require the WAL to succeed for commits to proceed. Default: false.
+    // When true, if log_decision() fails, we refuse to start phase 2 and
+    // roll back instead -- trading availability for durability. When
+    // false, a log write failure is logged to stderr but the commit
+    // continues (caller might prefer availability over durability).
+    void set_require_durable_log(bool required) { require_durable_log_ = required; }
+
+    // Set a tight per-phase statement timeout (in milliseconds). When > 0,
+    // the manager issues a SET SESSION max_execution_time (MySQL) or
+    // SET LOCAL statement_timeout (PostgreSQL) on each participant BEFORE
+    // phase 1 (XA PREPARE / PREPARE TRANSACTION) and again before phase 2
+    // (XA COMMIT / COMMIT PREPARED). This is independent of the backend
+    // connection's default read/write timeout -- use it to bound 2PC
+    // specifically without affecting other queries.
+    //
+    // 0 (default) means "don't override", fall back to whatever the
+    // backend connection's read/write timeout provides.
+    //
+    // NOTE: because ThreadSafeMultiRemoteExecutor may hand us a different
+    // pooled connection for each execute_dml call, the SET must be issued
+    // immediately before the statement whose timeout it bounds. We do
+    // that by concatenating them with "; " when multi-statement is
+    // supported, OR by issuing two separate execute_dml calls and
+    // tolerating that the second one may not actually have the timeout
+    // in effect (best-effort). For the MVP we use the two-call approach.
+    void set_phase_statement_timeout_ms(uint32_t ms) {
+        phase_statement_timeout_ms_ = ms;
+    }
+
     bool begin() override {
         txn_id_ = generate_txn_id();
         participants_.clear();
@@ -77,20 +116,69 @@ class DistributedTransactionManager : public TransactionManager {
 
         // Phase 1: prepare all participants
         if (!phase1_prepare()) {
+            // Durably record the ROLLBACK decision BEFORE dispatching to
+            // participants. If we crash between here and the rollback
+            // completing, recovery replays the rollback.
+            if (!log_decision_or_fail(DurableTransactionLog::Decision::ROLLBACK)) {
+                // Caller asked us to require durable logging and it failed
+                // -- leave transactions prepared rather than lose the
+                // decision. A DBA will resolve them manually via
+                // XA RECOVER + XA ROLLBACK.
+                active_ = false;
+                return false;
+            }
             phase2_rollback();
+            // Rollback is best-effort; mark COMPLETE regardless. An in-doubt
+            // prepared transaction left after a partial rollback is
+            // recorded separately (if we wanted) -- currently we accept
+            // that rollback failure is a separate class of operator issue.
+            maybe_log_complete();
+            active_ = false;
+            return false;
+        }
+
+        // Durably record the COMMIT decision BEFORE phase 2 dispatches.
+        // This is the core durability invariant: a record of "commit this
+        // transaction" exists on disk before any participant has been
+        // told to commit, so a crash before, during, or after phase 2
+        // is recoverable by replaying the committed decision.
+        if (!log_decision_or_fail(DurableTransactionLog::Decision::COMMIT)) {
+            // The WAL is required and it failed. Roll back in-memory state
+            // so the caller sees the commit fail; prepared transactions
+            // remain on backends until DBA cleanup.
             active_ = false;
             return false;
         }
 
         // Phase 2: commit all participants
         bool ok = phase2_commit();
+        // Only mark COMPLETE if every participant committed successfully.
+        // A partial commit is a heuristic hazard: some participants hold
+        // the data committed, others may still be prepared or failed. The
+        // transaction remains in-doubt in the log so startup recovery (or
+        // a DBA) can finish the job.
+        if (ok) {
+            maybe_log_complete();
+        } else {
+            std::fprintf(stderr,
+                "[DistributedTransactionManager] phase 2 commit failed for "
+                "txn %s; leaving in-doubt in the WAL for recovery.\n",
+                txn_id_.c_str());
+        }
         active_ = false;
         return ok;
     }
 
     bool rollback() override {
         if (!active_) return false;
+        // Durably record the ROLLBACK decision before dispatching.
+        (void)log_decision_or_fail(DurableTransactionLog::Decision::ROLLBACK);
         phase2_rollback();
+        // Rollback is best-effort; we mark COMPLETE whether or not every
+        // backend acknowledged the rollback. A failed rollback on a
+        // prepared transaction leaves the participant in a bad state that
+        // a DBA needs to resolve via XA RECOVER.
+        maybe_log_complete();
         active_ = false;
         return true;
     }
@@ -118,6 +206,62 @@ class DistributedTransactionManager : public TransactionManager {
     bool active_ = false;
     bool auto_commit_ = true;
 
+    DurableTransactionLog* txn_log_ = nullptr;
+    bool require_durable_log_ = false;
+    uint32_t phase_statement_timeout_ms_ = 0;
+
+    // Best-effort: set a per-session statement timeout on a backend before
+    // issuing a phase-1 or phase-2 SQL. Returns true if the SET succeeded
+    // OR if no timeout is configured; false only if the SET itself fails
+    // and the caller asked for a real timeout (in which case the caller
+    // may want to abort rather than risk an unbounded hang).
+    bool maybe_set_statement_timeout(const char* backend) {
+        if (phase_statement_timeout_ms_ == 0) return true;
+        std::string sql;
+        if (dialect_ == BackendDialect::MYSQL) {
+            // MySQL 5.7.4+: max_execution_time is in milliseconds and
+            // only bounds SELECTs. For DML and XA commands, the client
+            // read_timeout is our real protection. We still set this for
+            // SELECTs that might be issued between phases.
+            sql = "SET SESSION max_execution_time = " +
+                  std::to_string(phase_statement_timeout_ms_);
+        } else {
+            sql = "SET LOCAL statement_timeout = " +
+                  std::to_string(phase_statement_timeout_ms_);
+        }
+        return send_sql(backend, sql);
+    }
+
+    // Write the phase-2 decision to the durable log before dispatching.
+    // Returns true if the commit/rollback can proceed:
+    // - log not configured: true (log-less mode preserves legacy behavior)
+    // - log configured and write succeeded: true
+    // - log configured and write failed:
+    //     - require_durable_log_: false (abort, don't risk a crash
+    //       window without a recoverable decision)
+    //     - !require_durable_log_: true (write failure logged to stderr,
+    //       commit proceeds at the caller's risk)
+    bool log_decision_or_fail(DurableTransactionLog::Decision d) {
+        if (!txn_log_) return true;
+        if (txn_log_->log_decision(txn_id_, d, participants_)) return true;
+        if (require_durable_log_) {
+            std::fprintf(stderr,
+                "[DistributedTransactionManager] WAL write failed for txn %s; "
+                "refusing to proceed with phase 2 because require_durable_log is set.\n",
+                txn_id_.c_str());
+            return false;
+        }
+        std::fprintf(stderr,
+            "[DistributedTransactionManager] WAL write failed for txn %s; "
+            "proceeding without durability (set require_durable_log to refuse instead).\n",
+            txn_id_.c_str());
+        return true;
+    }
+
+    void maybe_log_complete() {
+        if (txn_log_) txn_log_->log_complete(txn_id_);
+    }
+
     // Generate a unique transaction ID.
     static std::string generate_txn_id() {
         auto now = std::chrono::steady_clock::now();
@@ -146,6 +290,13 @@ class DistributedTransactionManager : public TransactionManager {
     bool phase1_prepare() {
         bool all_ok = true;
         for (auto& p : participants_) {
+            // Best-effort per-phase timeout. See note on
+            // set_phase_statement_timeout_ms: on ThreadSafeMultiRemoteExecutor
+            // the SET may end up on a different pooled connection than
+            // the next statement, so this is advisory. The connection-level
+            // read/write timeout in connection_pool.h is the real ceiling.
+            maybe_set_statement_timeout(p.c_str());
+
             bool ok = false;
             if (dialect_ == BackendDialect::MYSQL) {
                 std::string end_sql = "XA END '" + txn_id_ + "'";
@@ -168,6 +319,8 @@ class DistributedTransactionManager : public TransactionManager {
     bool phase2_commit() {
         bool all_ok = true;
         for (auto& p : participants_) {
+            maybe_set_statement_timeout(p.c_str());
+
             bool ok = false;
             if (dialect_ == BackendDialect::MYSQL) {
                 std::string sql = "XA COMMIT '" + txn_id_ + "'";