Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions mysql-test/suite/galera/r/galera_bf_abort_orphan_lock.result
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
connection node_2;
connection node_1;
connection node_1;
CREATE TABLE t_a (id INT PRIMARY KEY, c INT) ENGINE=InnoDB;
CREATE TABLE t_b (id INT PRIMARY KEY, c INT) ENGINE=InnoDB;
INSERT INTO t_a VALUES (1, 0);
INSERT INTO t_b VALUES (1, 0);
connect node_1_holder, 127.0.0.1, root, , test, $NODE_MYPORT_1;
BEGIN;
SELECT * FROM t_b WHERE id = 1 FOR UPDATE;
id c
1 0
connect node_1_victim, 127.0.0.1, root, , test, $NODE_MYPORT_1;
SET SESSION tx_isolation = 'READ-COMMITTED';

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The system variable tx_isolation is deprecated. It is recommended to use transaction_isolation instead. Also, there are extra spaces before the = operator.

SET SESSION transaction_isolation = 'READ-COMMITTED';

BEGIN;
UPDATE t_a SET c = c + 1 WHERE id > 0;
connection node_1;
connection node_1_victim;
SET DEBUG_SYNC = 'row_search_before_rec_lock SIGNAL victim_at_lock WAIT_FOR victim_cont';
SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL victim_at_release WAIT_FOR holder_done';
UPDATE t_b SET c = c + 1 WHERE id > 0;
connection node_1;
SET DEBUG_SYNC = 'now WAIT_FOR victim_at_lock';
connection node_2;
UPDATE t_a SET c = c + 100 WHERE id = 1;
connection node_1;
SET DEBUG_SYNC = 'now SIGNAL victim_cont';
SET DEBUG_SYNC = 'now WAIT_FOR victim_at_release';
connection node_1_holder;
COMMIT;
connection node_1;
SET DEBUG_SYNC = 'now SIGNAL holder_done';
connection node_1_victim;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_1;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t_a, t_b;
disconnect node_1_holder;
disconnect node_1_victim;
103 changes: 103 additions & 0 deletions mysql-test/suite/galera/t/galera_bf_abort_orphan_lock.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc

#
# A local transaction is BF-aborted (wsrep victim flag set) while it holds a
# granted lock and is about to enqueue a NEW waiting lock for another row.
#
# Pre-fix, the victim parked an orphaned LOCK_WAIT request (with wait_thr still
# set) that survived into rollback; another transaction granting it later
# invoked lock_wait_end() on the victim from a different thread, corrupting its
# error_state and tripping the lock_wait_end() / que_thr_step() /
# rollback_finish() assertions. With the fix, lock_rec_enqueue_waiting()
# returns DB_DEADLOCK immediately for an already-chosen victim, so no orphaned
# waiter is created.
#
# Two separate tables (t_a, t_b) are used on purpose: the victim is paused
# holding t_b's page latch, while the applier conflicts on t_a -- so the BF
# abort cannot deadlock on a page latch the victim holds.
#
# Requires a debug build: the regression manifests as an assertion failure.
#

--connection node_1
CREATE TABLE t_a (id INT PRIMARY KEY, c INT) ENGINE=InnoDB;
CREATE TABLE t_b (id INT PRIMARY KEY, c INT) ENGINE=InnoDB;
INSERT INTO t_a VALUES (1, 0);
INSERT INTO t_b VALUES (1, 0);

# holder holds the lock that the victim will need on t_b
--connect node_1_holder, 127.0.0.1, root, , test, $NODE_MYPORT_1
BEGIN;
SELECT * FROM t_b WHERE id = 1 FOR UPDATE;

# Victim: holds a granted lock on t_a (makes it a BF target), then locks t_b
--connect node_1_victim, 127.0.0.1, root, , test, $NODE_MYPORT_1
SET SESSION tx_isolation = 'READ-COMMITTED';

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The system variable tx_isolation is deprecated. It is recommended to use transaction_isolation instead. Also, there are extra spaces before the = operator.

SET SESSION transaction_isolation = 'READ-COMMITTED';

BEGIN;
UPDATE t_a SET c = c + 1 WHERE id > 0;

--connection node_1
--let $bf_before = `SELECT variable_value FROM information_schema.global_status WHERE variable_name = 'wsrep_local_bf_aborts'`

# Pause the victim just before it requests the row lock on t_b: past the
# interrupt check at the top of rec_loop, before it enqueues a waiting lock,
# and holding no lock_sys/trx mutex (only t_b's page latch).
--connection node_1_victim
# Freeze the victim at two points:
# (1) just before it requests the t_b row lock -- so it can be flagged a BF
# victim while it still holds no wait_lock, and
# (2) once it reaches release_locks() during its rollback finalization, while
# its state is already TRX_STATE_COMMITTED_IN_MEMORY and (pre-fix) it still
# owns the orphaned LOCK_WAIT request on t_b.
SET DEBUG_SYNC = 'row_search_before_rec_lock SIGNAL victim_at_lock WAIT_FOR victim_cont';
SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL victim_at_release WAIT_FOR holder_done';
# WHERE id > 0 forces a non-unique clustered-index scan, i.e. the
# semi-consistent read path (lock_trx_handle_wait), where a victim's waiting
# lock is NOT auto-cancelled the way the plain lock_wait() path cancels it.
--send UPDATE t_b SET c = c + 1 WHERE id > 0

--connection node_1
SET DEBUG_SYNC = 'now WAIT_FOR victim_at_lock';

# node_2 writeset conflicts with the victim's granted t_a lock -> BF-aborts it.
# At this moment the victim has no wait_lock, so cancel_lock_wait_for_wsrep_bf_abort()
# only sets the wsrep victim flag (bit 2) and cancels nothing.
--connection node_2
UPDATE t_a SET c = c + 100 WHERE id = 1;

# Wait until the victim has actually been flagged as a BF victim
--connection node_1
--let $wait_condition = SELECT variable_value > $bf_before FROM information_schema.global_status WHERE variable_name = 'wsrep_local_bf_aborts'
--source include/wait_condition.inc

# Release the victim: it enqueues the t_b lock while already a victim, fails with
# deadlock and rolls back. Pre-fix it leaves an orphaned LOCK_WAIT request and
# parks in release_locks() (state COMMITTED_IN_MEMORY); the fix makes the
# enqueue return DB_DEADLOCK so no orphan is ever created.
SET DEBUG_SYNC = 'now SIGNAL victim_cont';
SET DEBUG_SYNC = 'now WAIT_FOR victim_at_release';

# Grant the (pre-fix) orphaned waiter by releasing holder's lock while the victim is
# parked in commit-in-memory: this invokes lock_wait_end() on the committed
# victim from H's thread -> assertion failure without the fix. With the fix
# there is no orphan, so the commit is clean.
--connection node_1_holder
COMMIT;

# Let the victim finish rolling back
--connection node_1
SET DEBUG_SYNC = 'now SIGNAL holder_done';

--connection node_1_victim
--error ER_LOCK_DEADLOCK
--reap

# Server is alive and consistent
--connection node_1
SET DEBUG_SYNC = 'RESET';
DROP TABLE t_a, t_b;

--disconnect node_1_holder
--disconnect node_1_victim
47 changes: 43 additions & 4 deletions storage/innobase/lock/lock0lock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,14 @@ lock_rec_enqueue_waiting(
lock, that's why we check only deadlock victim bit here. */
ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));

/* If this transaction has already been chosen as a victim — it must not
enqueue a new waiting lock request.
Return DB_DEADLOCK so that the caller rolls back instead of waiting. */
Comment on lines +1502 to +1504

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There is a typo in the comment: rolls back has an extra space. It should be rolls back.

	/* If this transaction has already been chosen as a victim — it must not
	enqueue a new waiting lock request.
	Return DB_DEADLOCK so that the caller rolls back instead of waiting. */

if (trx->lock.was_chosen_as_deadlock_victim) {
trx->error_state = DB_DEADLOCK;
return DB_DEADLOCK;
}

if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) {
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
return DB_LOCK_WAIT_TIMEOUT;
Expand Down Expand Up @@ -3825,10 +3833,6 @@ lock_table_enqueue_waiting(
ut_ad(trx->mutex_is_owner());
ut_ad(!trx->dict_operation_lock_mode);

/* Enqueue the lock request that will wait to be granted */
lock_table_create(table, mode | LOCK_WAIT, trx, c_lock);

trx->lock.wait_thr = thr;
/* Apart from Galera, only transactions that have waiting lock
may be chosen as deadlock victims. Only one lock can be waited for at a
time, and a transaction is associated with a single thread. That is why
Expand All @@ -3838,6 +3842,19 @@ lock_table_enqueue_waiting(
lock, that's why we check only deadlock victim bit here. */
ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));

/* A transaction already chosen as a victim must not enqueue a new
waiting lock request.
Return DB_DEADLOCK so the caller rolls the transaction back. */
if (trx->lock.was_chosen_as_deadlock_victim) {
trx->error_state = DB_DEADLOCK;
return DB_DEADLOCK;
}

/* Enqueue the lock request that will wait to be granted */
lock_table_create(table, mode | LOCK_WAIT, trx, c_lock);

trx->lock.wait_thr = thr;

MONITOR_INC(MONITOR_TABLELOCK_WAIT);
return(DB_LOCK_WAIT);
}
Expand Down Expand Up @@ -6702,6 +6719,28 @@ while holding a clustered index leaf page latch.
dberr_t lock_trx_handle_wait(trx_t *trx)
{
DEBUG_SYNC_C("lock_trx_handle_wait_enter");
#ifdef WITH_WSREP
/* A wsrep BF-abort marks the victim out-of-band: unlike native deadlock
resolution it does not cancel the victim's waiting lock atomically with
setting the flag, so the victim can reach this point still owning a live
LOCK_WAIT request (with trx->lock.wait_thr set).
So for BF-abort victm, we cancel the pending wait
here, which also clears trx->lock.wait_thr. But, this must not be done for
the native deadlock case, which cancels lock in Deadlock::report() */
Comment on lines +6723 to +6729

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

There are a few typos and spacing issues in this comment:

  • victm should be victim.
  • There are double spaces after commas/periods (e.g., victm, we and thr. But).
  • There are double spaces before the closing comment marker */.
  /* A wsrep BF-abort marks the victim out-of-band: unlike native deadlock
  resolution it does not cancel the victim's waiting lock atomically with
  setting the flag, so the victim can reach this point still owning a live
  LOCK_WAIT request (with trx->lock.wait_thr set). 
  So for BF-abort victim, we cancel the pending wait
  here, which also clears trx->lock.wait_thr. But, this must not be done for
  the native deadlock case, which cancels lock in Deadlock::report() */

if (trx->lock.was_chosen_as_deadlock_victim && trx->is_wsrep())
{
DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check");
if (trx->lock.wait_lock)
{
mysql_mutex_lock(&lock_sys.wait_mutex);
if (lock_t *wait_lock= trx->lock.wait_lock)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Following the project's coding standards, there should be spaces around the assignment operator =.

      if (lock_t *wait_lock = trx->lock.wait_lock)
References
  1. Follow the project's specific coding standards (e.g., CODING_STANDARDS.md) for code formatting and spacing, such as spacing around assignment operators, rather than generic style guides like the Google C++ Style Guide. (link)

lock_sys.cancel<true>(trx, wait_lock);
lock_sys.deadlock_check();
mysql_mutex_unlock(&lock_sys.wait_mutex);
}
return DB_DEADLOCK;
}
#endif /* WITH_WSREP */
if (trx->lock.was_chosen_as_deadlock_victim)
return DB_DEADLOCK;
DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check");
Expand Down
7 changes: 7 additions & 0 deletions storage/innobase/row/row0sel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5241,6 +5241,13 @@ row_search_mvcc(
lock_type = LOCK_ORDINARY;
}

/* Test hook (debug builds only): allows a BF abort to mark this
transaction a wsrep victim after the interrupt check at the top of
rec_loop but before it enqueues a waiting lock request below — the
window in which the orphaned-waiter bug forms. Inert in release
builds (DEBUG_SYNC_C compiles to nothing without ENABLED_DEBUG_SYNC). */
DEBUG_SYNC_C("row_search_before_rec_lock");

err = sel_set_rec_lock(pcur,
rec, index, offsets,
prebuilt->select_lock_type,
Expand Down