-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
MDEV-39681 delayed BF aborting with multi master load #5319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: 10.11
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,39 @@ | ||
| connection node_2; | ||
| connection node_1; | ||
| connection node_1; | ||
| CREATE TABLE t_a (id INT PRIMARY KEY, c INT) ENGINE=InnoDB; | ||
| CREATE TABLE t_b (id INT PRIMARY KEY, c INT) ENGINE=InnoDB; | ||
| INSERT INTO t_a VALUES (1, 0); | ||
| INSERT INTO t_b VALUES (1, 0); | ||
| connect node_1_holder, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
| BEGIN; | ||
| SELECT * FROM t_b WHERE id = 1 FOR UPDATE; | ||
| id c | ||
| 1 0 | ||
| connect node_1_victim, 127.0.0.1, root, , test, $NODE_MYPORT_1; | ||
| SET SESSION tx_isolation = 'READ-COMMITTED'; | ||
| BEGIN; | ||
| UPDATE t_a SET c = c + 1 WHERE id > 0; | ||
| connection node_1; | ||
| connection node_1_victim; | ||
| SET DEBUG_SYNC = 'row_search_before_rec_lock SIGNAL victim_at_lock WAIT_FOR victim_cont'; | ||
| SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL victim_at_release WAIT_FOR holder_done'; | ||
| UPDATE t_b SET c = c + 1 WHERE id > 0; | ||
| connection node_1; | ||
| SET DEBUG_SYNC = 'now WAIT_FOR victim_at_lock'; | ||
| connection node_2; | ||
| UPDATE t_a SET c = c + 100 WHERE id = 1; | ||
| connection node_1; | ||
| SET DEBUG_SYNC = 'now SIGNAL victim_cont'; | ||
| SET DEBUG_SYNC = 'now WAIT_FOR victim_at_release'; | ||
| connection node_1_holder; | ||
| COMMIT; | ||
| connection node_1; | ||
| SET DEBUG_SYNC = 'now SIGNAL holder_done'; | ||
| connection node_1_victim; | ||
| ERROR 40001: Deadlock found when trying to get lock; try restarting transaction | ||
| connection node_1; | ||
| SET DEBUG_SYNC = 'RESET'; | ||
| DROP TABLE t_a, t_b; | ||
| disconnect node_1_holder; | ||
| disconnect node_1_victim; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| --source include/galera_cluster.inc | ||
| --source include/have_innodb.inc | ||
| --source include/have_debug_sync.inc | ||
|
|
||
| # | ||
| # A local transaction is BF-aborted (wsrep victim flag set) while it holds a | ||
| # granted lock and is about to enqueue a NEW waiting lock for another row. | ||
| # | ||
| # Pre-fix, the victim parked an orphaned LOCK_WAIT request (with wait_thr still | ||
| # set) that survived into rollback; another transaction granting it later | ||
| # invoked lock_wait_end() on the victim from a different thread, corrupting its | ||
| # error_state and tripping the lock_wait_end() / que_thr_step() / | ||
| # rollback_finish() assertions. With the fix, lock_rec_enqueue_waiting() | ||
| # returns DB_DEADLOCK immediately for an already-chosen victim, so no orphaned | ||
| # waiter is created. | ||
| # | ||
| # Two separate tables (t_a, t_b) are used on purpose: the victim is paused | ||
| # holding t_b's page latch, while the applier conflicts on t_a -- so the BF | ||
| # abort cannot deadlock on a page latch the victim holds. | ||
| # | ||
| # Requires a debug build: the regression manifests as an assertion failure. | ||
| # | ||
|
|
||
| --connection node_1 | ||
| CREATE TABLE t_a (id INT PRIMARY KEY, c INT) ENGINE=InnoDB; | ||
| CREATE TABLE t_b (id INT PRIMARY KEY, c INT) ENGINE=InnoDB; | ||
| INSERT INTO t_a VALUES (1, 0); | ||
| INSERT INTO t_b VALUES (1, 0); | ||
|
|
||
| # holder holds the lock that the victim will need on t_b | ||
| --connect node_1_holder, 127.0.0.1, root, , test, $NODE_MYPORT_1 | ||
| BEGIN; | ||
| SELECT * FROM t_b WHERE id = 1 FOR UPDATE; | ||
|
|
||
| # Victim: holds a granted lock on t_a (makes it a BF target), then locks t_b | ||
| --connect node_1_victim, 127.0.0.1, root, , test, $NODE_MYPORT_1 | ||
| SET SESSION tx_isolation = 'READ-COMMITTED'; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| BEGIN; | ||
| UPDATE t_a SET c = c + 1 WHERE id > 0; | ||
|
|
||
| --connection node_1 | ||
| --let $bf_before = `SELECT variable_value FROM information_schema.global_status WHERE variable_name = 'wsrep_local_bf_aborts'` | ||
|
|
||
| # Pause the victim just before it requests the row lock on t_b: past the | ||
| # interrupt check at the top of rec_loop, before it enqueues a waiting lock, | ||
| # and holding no lock_sys/trx mutex (only t_b's page latch). | ||
| --connection node_1_victim | ||
| # Freeze the victim at two points: | ||
| # (1) just before it requests the t_b row lock -- so it can be flagged a BF | ||
| # victim while it still holds no wait_lock, and | ||
| # (2) once it reaches release_locks() during its rollback finalization, while | ||
| # its state is already TRX_STATE_COMMITTED_IN_MEMORY and (pre-fix) it still | ||
| # owns the orphaned LOCK_WAIT request on t_b. | ||
| SET DEBUG_SYNC = 'row_search_before_rec_lock SIGNAL victim_at_lock WAIT_FOR victim_cont'; | ||
| SET DEBUG_SYNC = 'trx_t_release_locks_enter SIGNAL victim_at_release WAIT_FOR holder_done'; | ||
| # WHERE id > 0 forces a non-unique clustered-index scan, i.e. the | ||
| # semi-consistent read path (lock_trx_handle_wait), where a victim's waiting | ||
| # lock is NOT auto-cancelled the way the plain lock_wait() path cancels it. | ||
| --send UPDATE t_b SET c = c + 1 WHERE id > 0 | ||
|
|
||
| --connection node_1 | ||
| SET DEBUG_SYNC = 'now WAIT_FOR victim_at_lock'; | ||
|
|
||
| # node_2 writeset conflicts with the victim's granted t_a lock -> BF-aborts it. | ||
| # At this moment the victim has no wait_lock, so cancel_lock_wait_for_wsrep_bf_abort() | ||
| # only sets the wsrep victim flag (bit 2) and cancels nothing. | ||
| --connection node_2 | ||
| UPDATE t_a SET c = c + 100 WHERE id = 1; | ||
|
|
||
| # Wait until the victim has actually been flagged as a BF victim | ||
| --connection node_1 | ||
| --let $wait_condition = SELECT variable_value > $bf_before FROM information_schema.global_status WHERE variable_name = 'wsrep_local_bf_aborts' | ||
| --source include/wait_condition.inc | ||
|
|
||
| # Release the victim: it enqueues the t_b lock while already a victim, fails with | ||
| # deadlock and rolls back. Pre-fix it leaves an orphaned LOCK_WAIT request and | ||
| # parks in release_locks() (state COMMITTED_IN_MEMORY); the fix makes the | ||
| # enqueue return DB_DEADLOCK so no orphan is ever created. | ||
| SET DEBUG_SYNC = 'now SIGNAL victim_cont'; | ||
| SET DEBUG_SYNC = 'now WAIT_FOR victim_at_release'; | ||
|
|
||
| # Grant the (pre-fix) orphaned waiter by releasing holder's lock while the victim is | ||
| # parked in commit-in-memory: this invokes lock_wait_end() on the committed | ||
| # victim from H's thread -> assertion failure without the fix. With the fix | ||
| # there is no orphan, so the commit is clean. | ||
| --connection node_1_holder | ||
| COMMIT; | ||
|
|
||
| # Let the victim finish rolling back | ||
| --connection node_1 | ||
| SET DEBUG_SYNC = 'now SIGNAL holder_done'; | ||
|
|
||
| --connection node_1_victim | ||
| --error ER_LOCK_DEADLOCK | ||
| --reap | ||
|
|
||
| # Server is alive and consistent | ||
| --connection node_1 | ||
| SET DEBUG_SYNC = 'RESET'; | ||
| DROP TABLE t_a, t_b; | ||
|
|
||
| --disconnect node_1_holder | ||
| --disconnect node_1_victim | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1499,6 +1499,14 @@ lock_rec_enqueue_waiting( | |
| lock, that's why we check only deadlock victim bit here. */ | ||
| ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1)); | ||
|
|
||
| /* If this transaction has already been chosen as a victim — it must not | ||
| enqueue a new waiting lock request. | ||
| Return DB_DEADLOCK so that the caller rolls back instead of waiting. */ | ||
|
Comment on lines
+1502
to
+1504
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| if (trx->lock.was_chosen_as_deadlock_victim) { | ||
| trx->error_state = DB_DEADLOCK; | ||
| return DB_DEADLOCK; | ||
| } | ||
|
|
||
| if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) { | ||
| trx->error_state = DB_LOCK_WAIT_TIMEOUT; | ||
| return DB_LOCK_WAIT_TIMEOUT; | ||
|
|
@@ -3825,10 +3833,6 @@ lock_table_enqueue_waiting( | |
| ut_ad(trx->mutex_is_owner()); | ||
| ut_ad(!trx->dict_operation_lock_mode); | ||
|
|
||
| /* Enqueue the lock request that will wait to be granted */ | ||
| lock_table_create(table, mode | LOCK_WAIT, trx, c_lock); | ||
|
|
||
| trx->lock.wait_thr = thr; | ||
| /* Apart from Galera, only transactions that have waiting lock | ||
| may be chosen as deadlock victims. Only one lock can be waited for at a | ||
| time, and a transaction is associated with a single thread. That is why | ||
|
|
@@ -3838,6 +3842,19 @@ lock_table_enqueue_waiting( | |
| lock, that's why we check only deadlock victim bit here. */ | ||
| ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1)); | ||
|
|
||
| /* A transaction already chosen as a victim must not enqueue a new | ||
| waiting lock request. | ||
| Return DB_DEADLOCK so the caller rolls the transaction back. */ | ||
| if (trx->lock.was_chosen_as_deadlock_victim) { | ||
| trx->error_state = DB_DEADLOCK; | ||
| return DB_DEADLOCK; | ||
| } | ||
|
|
||
| /* Enqueue the lock request that will wait to be granted */ | ||
| lock_table_create(table, mode | LOCK_WAIT, trx, c_lock); | ||
|
|
||
| trx->lock.wait_thr = thr; | ||
|
|
||
| MONITOR_INC(MONITOR_TABLELOCK_WAIT); | ||
| return(DB_LOCK_WAIT); | ||
| } | ||
|
|
@@ -6702,6 +6719,28 @@ while holding a clustered index leaf page latch. | |
| dberr_t lock_trx_handle_wait(trx_t *trx) | ||
| { | ||
| DEBUG_SYNC_C("lock_trx_handle_wait_enter"); | ||
| #ifdef WITH_WSREP | ||
| /* A wsrep BF-abort marks the victim out-of-band: unlike native deadlock | ||
| resolution it does not cancel the victim's waiting lock atomically with | ||
| setting the flag, so the victim can reach this point still owning a live | ||
| LOCK_WAIT request (with trx->lock.wait_thr set). | ||
| So for BF-abort victm, we cancel the pending wait | ||
| here, which also clears trx->lock.wait_thr. But, this must not be done for | ||
| the native deadlock case, which cancels lock in Deadlock::report() */ | ||
|
Comment on lines
+6723
to
+6729
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a few typos and spacing issues in this comment:
/* A wsrep BF-abort marks the victim out-of-band: unlike native deadlock
resolution it does not cancel the victim's waiting lock atomically with
setting the flag, so the victim can reach this point still owning a live
LOCK_WAIT request (with trx->lock.wait_thr set).
So for BF-abort victim, we cancel the pending wait
here, which also clears trx->lock.wait_thr. But, this must not be done for
the native deadlock case, which cancels lock in Deadlock::report() */ |
||
| if (trx->lock.was_chosen_as_deadlock_victim && trx->is_wsrep()) | ||
| { | ||
| DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check"); | ||
| if (trx->lock.wait_lock) | ||
| { | ||
| mysql_mutex_lock(&lock_sys.wait_mutex); | ||
| if (lock_t *wait_lock= trx->lock.wait_lock) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Following the project's coding standards, there should be spaces around the assignment operator if (lock_t *wait_lock = trx->lock.wait_lock)References
|
||
| lock_sys.cancel<true>(trx, wait_lock); | ||
| lock_sys.deadlock_check(); | ||
| mysql_mutex_unlock(&lock_sys.wait_mutex); | ||
| } | ||
| return DB_DEADLOCK; | ||
| } | ||
| #endif /* WITH_WSREP */ | ||
| if (trx->lock.was_chosen_as_deadlock_victim) | ||
| return DB_DEADLOCK; | ||
| DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check"); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The system variable
tx_isolationis deprecated. It is recommended to usetransaction_isolationinstead. Also, there are extra spaces before the=operator.