From a461ea5adf8cfda5d6372e3acd7579f1222f2198 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Thu, 2 Apr 2026 21:22:57 +0300 Subject: [PATCH] MDEV-38243 Write binlog row events for changes done by cascading FK operations This commit implements a feature which changes the handling of cascading foreign key operations to write the changes of cascading operations into binlog. The applying of such transaction, in the slave node, will apply just the binlog events, and does not execute the actual foreign key cascade operation. This will simplify the slave side replication applying and make it more predictable in terms of potential interference with other parallel applying happning in the node. This feature can be turned ON/OFF by new variable: rpl_use_binlog_events_for_fk_cascade, with default value OFF The actual implementation is largely by windsurf. The commit has also mtr tests for testing rpl_use_binlog_events_for_fk_cascade feature: rpl.rpl_fk_cascade_binlog_row, rpl.rpl_fk_set_null_binlog_row and rpl.fk_cascade_binlog_row_rollback --- include/mysql/plugin.h | 1 + include/mysql/service_thd_binlog.h | 47 +++ include/mysql/service_wsrep.h | 2 + mysql-test/main/mysqld--help.result | 4 + .../rpl/r/rpl_fk_cascade_binlog_row.result | 85 +++++ .../rpl_fk_cascade_binlog_row_rollback.result | 140 ++++++++ .../rpl/r/rpl_fk_set_null_binlog_row.result | 61 ++++ .../rpl/t/rpl_fk_cascade_binlog_row.test | 133 ++++++++ .../t/rpl_fk_cascade_binlog_row_rollback.test | 201 ++++++++++++ .../rpl/t/rpl_fk_set_null_binlog_row.test | 105 ++++++ .../sys_vars/r/sysvars_server_embedded.result | 10 + .../r/sysvars_server_notembedded.result | 10 + sql/handler.cc | 68 +++- sql/handler.h | 13 +- sql/log.cc | 25 +- sql/log_event.h | 2 + sql/log_event_server.cc | 3 + sql/service_wsrep.cc | 5 + sql/sql_class.cc | 58 ++++ sql/sql_class.h | 6 + sql/sys_vars.cc | 20 ++ storage/innobase/handler/ha_innodb.cc | 80 ++++- storage/innobase/handler/ha_innodb.h | 18 ++ storage/innobase/include/row0sel.h | 12 +- storage/innobase/include/trx0trx.h | 15 + storage/innobase/row/row0ins.cc | 299 +++++++++++++++++- storage/innobase/row/row0sel.cc | 2 +- storage/innobase/trx/trx0trx.cc | 25 +- 28 files changed, 1434 insertions(+), 16 deletions(-) create mode 100644 include/mysql/service_thd_binlog.h create mode 100644 mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result create mode 100644 mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result create mode 100644 mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result create mode 100644 mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test create mode 100644 mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test create mode 100644 mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 5c2a7311dd27e..a03dddb0dd195 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -697,6 +697,7 @@ int thd_in_lock_tables(const MYSQL_THD thd); int thd_tablespace_op(const MYSQL_THD thd); long long thd_test_options(const MYSQL_THD thd, long long test_options); int thd_sql_command(const MYSQL_THD thd); + struct DDL_options_st; struct DDL_options_st *thd_ddl_options(const MYSQL_THD thd); void thd_storage_lock_wait(MYSQL_THD thd, long long value); diff --git a/include/mysql/service_thd_binlog.h b/include/mysql/service_thd_binlog.h new file mode 100644 index 0000000000000..912f4213ee662 --- /dev/null +++ b/include/mysql/service_thd_binlog.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2026, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct TABLE; +class Event_log; +class binlog_cache_data; + +int thd_is_current_stmt_binlog_format_row(const MYSQL_THD thd); + +int thd_rpl_use_binlog_events_for_fk_cascade(const MYSQL_THD thd); + +void thd_binlog_mark_fk_cascade_events(MYSQL_THD thd); + +int thd_binlog_update_row(MYSQL_THD thd, struct TABLE *table, + class Event_log *bin_log, + class binlog_cache_data *cache_data, + int is_trans, unsigned long row_image, + const unsigned char *before_record, + const unsigned char *after_record); + +int thd_binlog_delete_row(MYSQL_THD thd, struct TABLE *table, + class Event_log *bin_log, + class binlog_cache_data *cache_data, + int is_trans, unsigned long row_image, + const unsigned char *before_record); + +#ifdef __cplusplus +} +#endif diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 8c001ca147063..9d0114ac04f0e 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -153,6 +153,7 @@ extern struct wsrep_service_st { #define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I) #define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T) #define wsrep_get_domain_id(T) wsrep_service->wsrep_get_domain_id_func(T) +#define wsrep_emulate_binlog(T) wsrep_service->wsrep_emulate_binlog_func(T) #else #define MYSQL_SERVICE_WSREP_STATIC_INCLUDED @@ -265,5 +266,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd, /* declare parallel applying unsafety for the THD */ extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd); extern "C" uint32 wsrep_get_domain_id(); +extern "C" my_bool wsrep_emulate_binlog(const MYSQL_THD thd); #endif #endif /* MYSQL_SERVICE_WSREP_INCLUDED */ diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 623311c170616..04cf3a8be387d 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -1389,6 +1389,9 @@ The following specify which files/extra groups are read (specified before remain play when stop slave is executed --rpl-semi-sync-slave-trace-level=# The tracing level for semi-sync replication + --rpl-use-binlog-events-for-fk-cascade + If enabled, the master will write row events for foreign + key cascade operations --safe-mode Skip some optimize stages (for testing). Deprecated, will be removed in a future release. --safe-user-create Don't allow new user creation by the user who has no @@ -2096,6 +2099,7 @@ rpl-semi-sync-slave-delay-master FALSE rpl-semi-sync-slave-enabled FALSE rpl-semi-sync-slave-kill-conn-timeout 5 rpl-semi-sync-slave-trace-level 32 +rpl-use-binlog-events-for-fk-cascade FALSE safe-user-create FALSE secure-file-priv (No default value) secure-timestamp NO diff --git a/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result new file mode 100644 index 0000000000000..5eb0ef025a8bf --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result @@ -0,0 +1,85 @@ +include/master-slave.inc +[connection master] +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE CASCADE +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +2 +connection master; +DELETE FROM p WHERE id=2; +connection slave; +connection slave; +SELECT COUNT(*) FROM p; +COUNT(*) +0 +SELECT COUNT(*) FROM c; +COUNT(*) +0 +connection master; +FLUSH BINARY LOGS; +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_binlog_row_off.sql +NOT FOUND /### DELETE FROM `test`.`c`/ in fk_cascade_binlog_row_off.sql +DROP TABLE c, p; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE CASCADE +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +2 +connection master; +DELETE FROM p WHERE id=2; +connection slave; +connection slave; +SELECT COUNT(*) FROM p; +COUNT(*) +0 +SELECT COUNT(*) FROM c; +COUNT(*) +0 +connection master; +FLUSH BINARY LOGS; +FOUND 1 /### UPDATE `test`.`c`/ in fk_cascade_binlog_row_on.sql +FOUND 1 /### DELETE FROM `test`.`c`/ in fk_cascade_binlog_row_on.sql +DROP TABLE c, p; +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result new file mode 100644 index 0000000000000..25eee590001fa --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result @@ -0,0 +1,140 @@ +include/master-slave.inc +[connection master] +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE CASCADE +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +connection slave; +connection master; +# +# Phase 1: full transaction ROLLBACK +# +FLUSH BINARY LOGS; +BEGIN; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK; +# master: parent and child unchanged +SELECT * FROM p ORDER BY id; +id v +1 10 +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection slave; +connection slave; +# slave: child unchanged +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection master; +FLUSH BINARY LOGS; +# no cascade child row event must be present +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_rollback_full.sql +# +# Phase 2: ROLLBACK TO SAVEPOINT (surviving work before the savepoint) +# +FLUSH BINARY LOGS; +BEGIN; +INSERT INTO p VALUES (3, 30); +SAVEPOINT sp1; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK TO SAVEPOINT sp1; +COMMIT; +# master: cascade undone, INSERT before savepoint kept +SELECT * FROM p ORDER BY id; +id v +1 10 +3 30 +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection slave; +connection slave; +# slave: child unchanged, parent has surviving row +SELECT * FROM p ORDER BY id; +id v +1 10 +3 30 +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection master; +FLUSH BINARY LOGS; +# cascade child row event must be absent +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_rollback_sp.sql +# surviving INSERT before the savepoint must be replicated +FOUND 1 /### INSERT INTO `test`.`p`/ in fk_cascade_rollback_sp.sql +DROP TABLE c, p; +# +# Phase 3: statement rollback, transaction continues and commits +# +CREATE TABLE p ( +id INT PRIMARY KEY, +u INT, +UNIQUE KEY uq_u(u) +) ENGINE=InnoDB; +CREATE TABLE c ( +cid INT PRIMARY KEY, +pid INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 100), (2, 200); +INSERT INTO c VALUES (11, 1); +connection slave; +connection master; +FLUSH BINARY LOGS; +BEGIN; +UPDATE p SET id=id+10, u=100 WHERE id IN (1,2) ORDER BY id; +ERROR 23000: Duplicate entry '100' for key 'uq_u' +INSERT INTO p VALUES (9, 900); +COMMIT; +# master: failed UPDATE rolled back, INSERT committed +SELECT * FROM p ORDER BY id; +id u +1 100 +2 200 +9 900 +SELECT * FROM c ORDER BY cid; +cid pid +11 1 +connection slave; +connection slave; +# slave: child unchanged, parent has committed row +SELECT * FROM p ORDER BY id; +id u +1 100 +2 200 +9 900 +SELECT * FROM c ORDER BY cid; +cid pid +11 1 +connection master; +FLUSH BINARY LOGS; +# discarded cascade child row event must be absent +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_rollback_stmt.sql +# committed INSERT must be replicated +FOUND 1 /### INSERT INTO `test`.`p`/ in fk_cascade_rollback_stmt.sql +DROP TABLE c, p; +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result b/mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result new file mode 100644 index 0000000000000..43a6c65f91b69 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result @@ -0,0 +1,61 @@ +include/master-slave.inc +[connection master] +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT NULL, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE SET NULL +ON UPDATE SET NULL +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +NULL +connection master; +FLUSH BINARY LOGS; +NOT FOUND /### UPDATE `test`.`c`/ in fk_set_null_binlog_row_off.sql +DROP TABLE c, p; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT NULL, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE SET NULL +ON UPDATE SET NULL +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +NULL +connection master; +FLUSH BINARY LOGS; +FOUND 1 /### UPDATE `test`.`c`/ in fk_set_null_binlog_row_on.sql +DROP TABLE c, p; +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test new file mode 100644 index 0000000000000..73d083a66dda5 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test @@ -0,0 +1,133 @@ +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_row.inc + +--source include/master-slave.inc + +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; + +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; + +# Phase A: feature OFF (no explicit FK cascade row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE CASCADE + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +connection slave; +SELECT pid FROM c ORDER BY id; + +connection master; +DELETE FROM p WHERE id=2; + +sync_slave_with_master; +connection slave; +SELECT COUNT(*) FROM p; +SELECT COUNT(*) FROM c; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--let SEARCH_PATTERN= ### DELETE FROM `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +# Phase B: feature ON (explicit FK cascade row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE CASCADE + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +connection slave; +SELECT pid FROM c ORDER BY id; + +connection master; +DELETE FROM p WHERE id=2; + +sync_slave_with_master; +connection slave; +SELECT COUNT(*) FROM p; +SELECT COUNT(*) FROM c; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql +--let SEARCH_ABORT= NOT FOUND|FOUND 0|FOUND [2-9] +--source include/search_pattern_in_file.inc + +--let SEARCH_PATTERN= ### DELETE FROM `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql +--let SEARCH_ABORT= NOT FOUND|FOUND 0|FOUND [2-9] +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; + +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test new file mode 100644 index 0000000000000..b28060718ece0 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test @@ -0,0 +1,201 @@ +# +# MDEV-38243 +# +# When rpl_use_binlog_events_for_fk_cascade is enabled the master collects +# row events for FK cascade operations. If the row changes those events +# describe are rolled back, the events must be discarded (freed) and must not +# end up in the binary log. This test exercises the three rollback paths that +# discard the queued cascade events: +# +# 1. full transaction ROLLBACK (ha_rollback_trans, all=true) +# 2. ROLLBACK TO SAVEPOINT (ha_rollback_to_savepoint) +# 3. statement rollback, transaction kept (ha_rollback_trans, all=false) +# +# In every case the cascaded child row event (### UPDATE `test`.`c`) must be +# absent from the binary log, the master and slave data must stay consistent, +# and any surviving work in the same transaction must still replicate. +# +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_row.inc + +--source include/master-slave.inc + +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; + +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE CASCADE + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +sync_slave_with_master; +connection master; + +--echo # +--echo # Phase 1: full transaction ROLLBACK +--echo # +FLUSH BINARY LOGS; +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) + +BEGIN; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK; + +--echo # master: parent and child unchanged +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY id; + +sync_slave_with_master; +connection slave; +--echo # slave: child unchanged +SELECT * FROM c ORDER BY id; + +connection master; +FLUSH BINARY LOGS; +--let $datadir = `SELECT @@datadir` +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_full.sql + +--echo # no cascade child row event must be present +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_full.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--echo # +--echo # Phase 2: ROLLBACK TO SAVEPOINT (surviving work before the savepoint) +--echo # +FLUSH BINARY LOGS; +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) + +BEGIN; +INSERT INTO p VALUES (3, 30); +SAVEPOINT sp1; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK TO SAVEPOINT sp1; +COMMIT; + +--echo # master: cascade undone, INSERT before savepoint kept +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY id; + +sync_slave_with_master; +connection slave; +--echo # slave: child unchanged, parent has surviving row +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY id; + +connection master; +FLUSH BINARY LOGS; +--let $datadir = `SELECT @@datadir` +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql + +--echo # cascade child row event must be absent +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--echo # surviving INSERT before the savepoint must be replicated +--let SEARCH_PATTERN= ### INSERT INTO `test`.`p` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql +--let SEARCH_ABORT= NOT FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +--echo # +--echo # Phase 3: statement rollback, transaction continues and commits +--echo # +# The parent has a secondary UNIQUE key. A multi-row UPDATE cascades on the +# first row (queuing a child cascade event) and then hits a duplicate-key +# error on the second row, which rolls back the statement. The transaction +# stays open and commits later work. The discarded cascade event must not be +# flushed into the binary log at COMMIT. +CREATE TABLE p ( + id INT PRIMARY KEY, + u INT, + UNIQUE KEY uq_u(u) +) ENGINE=InnoDB; + +CREATE TABLE c ( + cid INT PRIMARY KEY, + pid INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 100), (2, 200); +INSERT INTO c VALUES (11, 1); + +sync_slave_with_master; +connection master; + +FLUSH BINARY LOGS; +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) + +BEGIN; +# row id=1: id 1->11 cascades c.pid 1->11 (event queued), u 100->100 (no change) +# row id=2: u 200->100 duplicates row 1's u -> ER_DUP_ENTRY -> statement rollback +--error ER_DUP_ENTRY +UPDATE p SET id=id+10, u=100 WHERE id IN (1,2) ORDER BY id; +INSERT INTO p VALUES (9, 900); +COMMIT; + +--echo # master: failed UPDATE rolled back, INSERT committed +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY cid; + +sync_slave_with_master; +connection slave; +--echo # slave: child unchanged, parent has committed row +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY cid; + +connection master; +FLUSH BINARY LOGS; +--let $datadir = `SELECT @@datadir` +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql + +--echo # discarded cascade child row event must be absent +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--echo # committed INSERT must be replicated +--let SEARCH_PATTERN= ### INSERT INTO `test`.`p` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql +--let SEARCH_ABORT= NOT FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; + +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_full.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test b/mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test new file mode 100644 index 0000000000000..42c7278e45ee3 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test @@ -0,0 +1,105 @@ +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_row.inc + +--source include/master-slave.inc + +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; + +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; + +# Phase A: feature OFF (no explicit FK SET NULL row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT NULL, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE SET NULL + ON UPDATE SET NULL +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +SELECT pid FROM c ORDER BY id; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_off.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_off.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +# Phase B: feature ON (explicit FK SET NULL row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT NULL, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE SET NULL + ON UPDATE SET NULL +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +SELECT pid FROM c ORDER BY id; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_on.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_on.sql +--let SEARCH_ABORT= NOT FOUND|FOUND 0|FOUND [2-9] +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; + +--remove_file $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_off.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_on.sql + +--source include/rpl_end.inc diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 481ca0b5f4030..3c7097a0a5245 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -3462,6 +3462,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME RPL_USE_BINLOG_EVENTS_FOR_FK_CASCADE +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT If enabled, the master will write row events for foreign key cascade operations +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME SECURE_FILE_PRIV VARIABLE_SCOPE GLOBAL VARIABLE_TYPE VARCHAR diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 579dae7af5aab..eed0d7feb7f03 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -4032,6 +4032,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME RPL_USE_BINLOG_EVENTS_FOR_FK_CASCADE +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT If enabled, the master will write row events for foreign key cascade operations +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME SECURE_FILE_PRIV VARIABLE_SCOPE GLOBAL VARIABLE_TYPE VARCHAR diff --git a/sql/handler.cc b/sql/handler.cc index 12dc4240c63fb..bfb2807370691 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -118,6 +118,30 @@ static handlerton *installed_htons[128]; KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, false }; +static void flush_pending_cascade_binlog_for_thd(THD *thd) +{ + if (!thd || thd->rgi_slave) return; + + TABLE *table; + for (table = thd->open_tables; table; table = table->next) { + if (table->file) { + table->file->flush_pending_cascade_binlog(); + } + } +} + +static void discard_pending_cascade_binlog_for_thd(THD *thd) +{ + if (!thd || thd->rgi_slave) return; + + TABLE *table; + for (table = thd->open_tables; table; table = table->next) { + if (table->file) { + table->file->discard_pending_cascade_binlog(); + } + } +} + /* number of entries in handlertons[] */ ulong total_ha= 0; /* number of storage engines (from handlertons[]) that support 2pc */ @@ -2144,6 +2168,14 @@ int ha_commit_trans(THD *thd, bool all) thd->mdl_context.release_lock(thd->backup_commit_lock); thd->backup_commit_lock= 0; } + + if (!error) + { + if ((thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd)) && + (all || thd->in_active_multi_stmt_transaction())) + flush_pending_cascade_binlog_for_thd(thd); + } #ifdef WITH_WSREP if (wsrep_is_active(thd) && is_real_trans && !error && (rw_ha_count == 0 || all) && @@ -2337,12 +2369,11 @@ int ha_rollback_trans(THD *thd, bool all) rollback without signalling following transactions. And in release builds, we explicitly do the signalling before rolling back. */ - DBUG_ASSERT( - !(thd->rgi_slave && - !thd->rgi_slave->worker_error && - thd->rgi_slave->did_mark_start_commit) || - (thd->transaction->xid_state.is_explicit_XA() || - (thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_PREPARED_XA))); + if (!(thd->rgi_slave && thd->rgi_slave->worker_error)) + DBUG_ASSERT( + !(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) || + (thd->transaction->xid_state.is_explicit_XA() || + (thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_PREPARED_XA))); if (thd->rgi_slave && !thd->rgi_slave->worker_error && @@ -2424,6 +2455,19 @@ int ha_rollback_trans(THD *thd, bool all) binlog_post_rollback(thd, all); } + /* + The engines have rolled back (the whole transaction when all==true, or the + current statement to its implicit savepoint when all==false). Any queued + FK-cascade row events describe row changes that were just undone, so + discard and free them rather than letting them be flushed into the binary + log by a later statement or at commit. For a full rollback the queue would + also be freed when the trx is released, but discarding here keeps the + statement-rollback case correct and the behaviour consistent. + */ + if (thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd)) + discard_pending_cascade_binlog_for_thd(thd); + #ifdef WITH_WSREP if (WSREP(thd) && thd->is_error()) { @@ -3344,6 +3388,18 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } + + /* + The cascade row events queued for this transaction describe row + changes that are being rolled back to the savepoint (or the queue is + empty because earlier statements already flushed their events into the + binlog cache, which the binlog savepoint machinery truncates + separately). Either way we must not write these events, so discard and + free them rather than flushing them into the binary log. + */ + if (thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd)) + discard_pending_cascade_binlog_for_thd(thd); #ifdef WITH_WSREP if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION) { diff --git a/sql/handler.h b/sql/handler.h index f0763aebe717c..cdb1d23c3c5dd 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -5327,7 +5327,18 @@ class handler :public Sql_alloc bool check_table_binlog_row_based(); bool prepare_for_row_logging(); int prepare_for_modify(bool can_set_fields, bool can_lookup); - int binlog_log_row(const uchar *before_record, const uchar *after_record, + + virtual void flush_pending_cascade_binlog() {} + + /* + Discard FK-cascade row events queued for this transaction, + when transaction rolls back. + */ + virtual void discard_pending_cascade_binlog() {} + + int prepare_for_insert(bool do_create); + int binlog_log_row(const uchar *before_record, + const uchar *after_record, Log_func *log_func); inline void clear_cached_table_binlog_row_based_flag() diff --git a/sql/log.cc b/sql/log.cc index 387795e30fce7..2c155fda778b7 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -8068,7 +8068,27 @@ int binlog_flush_pending_rows_event(THD *thd, bool stmt_end, */ if (stmt_end) { - pending->set_flags(Rows_log_event::STMT_END_F); + if (thd->binlog_fk_cascade_events && + !thd->rgi_slave && + (thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd))) + { + TABLE *table; + for (table= thd->open_tables; table; table= table->next) + { + if (table->file) + table->file->flush_pending_cascade_binlog(); + } + } + + /* + Flushing cascaded row events may have created a new pending event or + replaced the current one. Ensure we mark the final pending event as + statement end. + */ + pending= cache_data->pending(); + if (pending) + pending->set_flags(Rows_log_event::STMT_END_F); thd->reset_binlog_for_next_statement(); } @@ -8294,6 +8314,9 @@ Event_log::prepare_pending_rows_event(THD *thd, TABLE* table, if (unlikely(!ev)) DBUG_RETURN(NULL); ev->server_id= serv_id; // I don't like this, it's too easy to forget. + + if (thd->binlog_fk_cascade_events) + ev->set_flags(Rows_log_event::FK_CASCADE_EVENTS_F); /* flush the pending event and replace it with the newly created event... diff --git a/sql/log_event.h b/sql/log_event.h index 517b2e62fd06e..60b29732e8e0a 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -4789,6 +4789,8 @@ class Rows_log_event : public Log_event */ COMPLETE_ROWS_F = (1U << 3), + FK_CASCADE_EVENTS_F = (1U << 4), + /* Value of the OPTION_NO_CHECK_CONSTRAINT_CHECKS flag in thd->options */ NO_CHECK_CONSTRAINT_CHECKS_F = (1U << 7) }; diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 3c8d7bea47381..7a56b850d2350 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -5051,6 +5051,9 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) else thd->variables.option_bits&= ~OPTION_NO_CHECK_CONSTRAINT_CHECKS; + if (get_flags(FK_CASCADE_EVENTS_F)) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + /* A small test to verify that objects have consistent types */ DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 3f09e9d8b3e47..705d64921c372 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -441,3 +441,8 @@ extern "C" my_bool wsrep_thd_is_local_transaction(const THD *thd) return (wsrep_thd_is_local(thd) && thd->wsrep_cs().transaction().active()); } + +extern "C" my_bool wsrep_emulate_binlog(const THD *thd) +{ + return WSREP_EMULATE_BINLOG_NNULL(thd); +} diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 70c571f14e6ec..b26a1024896a1 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -93,6 +93,22 @@ extern "C" const uchar *get_var_key(const void *entry_, size_t *length, return reinterpret_cast(entry->name.str); } +void THD::binlog_mark_fk_cascade_events() +{ + binlog_fk_cascade_events= true; + + if (binlog_cache_mngr *cache_mngr= binlog_get_cache_mngr()) + { + if (Rows_log_event *pending= binlog_get_pending_rows_event( + cache_mngr, use_trans_cache(this, false))) + pending->set_flags(Rows_log_event::FK_CASCADE_EVENTS_F); + + if (Rows_log_event *pending= binlog_get_pending_rows_event( + cache_mngr, use_trans_cache(this, true))) + pending->set_flags(Rows_log_event::FK_CASCADE_EVENTS_F); + } +} + extern "C" void free_user_var(void *entry_) { user_var_entry *entry= static_cast(entry_); @@ -507,6 +523,47 @@ int thd_sql_command(const THD *thd) return (int) thd->lex->sql_command; } +extern "C" +int thd_is_current_stmt_binlog_format_row(const THD *thd) +{ + return (int) thd->is_current_stmt_binlog_format_row(); +} + +extern "C" +int thd_rpl_use_binlog_events_for_fk_cascade(const THD *thd) +{ + return (int) thd->variables.rpl_use_binlog_events_for_fk_cascade; +} + +extern "C" +void thd_binlog_mark_fk_cascade_events(THD *thd) +{ + thd->binlog_mark_fk_cascade_events(); +} + +extern "C" +int thd_binlog_update_row(THD *thd, TABLE *table, Event_log *bin_log, + binlog_cache_data *cache_data, int is_trans, + unsigned long row_image, + const unsigned char *before_record, + const unsigned char *after_record) +{ + return thd->binlog_update_row(table, bin_log, cache_data, (bool) is_trans, + (enum_binlog_row_image) row_image, + before_record, after_record); +} + +extern "C" +int thd_binlog_delete_row(THD *thd, TABLE *table, Event_log *bin_log, + binlog_cache_data *cache_data, int is_trans, + unsigned long row_image, + const unsigned char *before_record) +{ + return thd->binlog_delete_row(table, bin_log, cache_data, (bool) is_trans, + (enum_binlog_row_image) row_image, + before_record); +} + /* Returns options used with DDL's, like IF EXISTS etc... Will returns 'nonsense' if the command was not a DDL. @@ -894,6 +951,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) mysys_var=0; binlog_evt_union.do_union= FALSE; binlog_table_maps= FALSE; + binlog_fk_cascade_events= FALSE; binlog_xid= 0; enable_slow_log= 0; durability_property= HA_REGULAR_DURABILITY; diff --git a/sql/sql_class.h b/sql/sql_class.h index aad3defa963be..4e7b9e0e12bec 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -916,6 +916,7 @@ typedef struct system_variables my_bool sql_log_bin; my_bool binlog_annotate_row_events; my_bool binlog_direct_non_trans_update; + my_bool rpl_use_binlog_events_for_fk_cascade; my_bool column_compression_zlib_wrap; my_bool sysdate_is_now; my_bool wsrep_on; @@ -3850,6 +3851,10 @@ class THD: public THD_count, /* this must be first */ /* 1 if binlog table maps has been written */ bool binlog_table_maps; + bool binlog_fk_cascade_events; + + void binlog_mark_fk_cascade_events(); + void issue_unsafe_warnings(); void reset_unsafe_warnings() { binlog_unsafe_warning_flags= 0; } @@ -3857,6 +3862,7 @@ class THD: public THD_count, /* this must be first */ void reset_binlog_for_next_statement() { binlog_table_maps= 0; + binlog_fk_cascade_events= false; } bool binlog_table_should_be_logged(const LEX_CSTRING *db); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index e6e0579dba7b0..8f0751b37bd4b 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -781,6 +781,26 @@ Sys_binlog_create_tmptable_format( NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(binlog_create_tmp_format_check)); +static bool rpl_use_binlog_events_for_fk_cascade_check(sys_var *self, THD *thd, set_var *var) +{ + if (var->type == OPT_GLOBAL) + return false; + + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_SQL_LOG_BIN, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SQL_LOG_BIN))) + return true; + + return false; +} + +static Sys_var_mybool Sys_rpl_use_binlog_events_for_fk_cascade( + "rpl_use_binlog_events_for_fk_cascade", + "If enabled, the master will write row events for foreign key cascade operations", + SESSION_VAR(rpl_use_binlog_events_for_fk_cascade), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(rpl_use_binlog_events_for_fk_cascade_check)); + static bool deprecated_explicit_defaults_for_timestamp(sys_var *self, THD *thd, set_var *var) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c276db592dac0..4978631193f82 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -49,8 +49,10 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include #include -#include +#include +#include #include +#include #include #include "sql_type_geom.h" #include "scope.h" @@ -8873,6 +8875,82 @@ ha_innobase::update_row( DBUG_RETURN(err); } +void +ha_innobase::flush_pending_cascade_binlog() +{ + trx_t* trx = thd_to_trx(m_user_thd); + const bool emulate_binlog= +#ifdef WITH_WSREP + wsrep_emulate_binlog(m_user_thd); +#else + false; +#endif + + if (!thd_rpl_use_binlog_events_for_fk_cascade(m_user_thd) && + !emulate_binlog) { + return; + } + + if (trx == NULL || trx->pending_cascade_binlog_row_events.empty()) { + return; + } + + m_user_thd->binlog_mark_fk_cascade_events(); + + for (auto& ev : trx->pending_cascade_binlog_row_events) { + if (ev.table == NULL || ev.table->file == NULL) { + if (ev.before_record) { + my_free(ev.before_record); + } + if (ev.after_record) { + my_free(ev.after_record); + } + continue; + } + + if (ev.table->s->tmp_table != NO_TMP_TABLE) { + if (ev.before_record) { + my_free(ev.before_record); + } + if (ev.after_record) { + my_free(ev.after_record); + } + continue; + } + + MY_BITMAP* old_read_set = ev.table->read_set; + MY_BITMAP* old_write_set = ev.table->write_set; + MY_BITMAP* old_rpl_write_set = ev.table->rpl_write_set; + + ev.table->column_bitmaps_set_no_signal( + &ev.table->s->all_set, &ev.table->s->all_set); + if (ev.table->rpl_write_set == NULL) { + ev.table->rpl_write_set = &ev.table->s->all_set; + } + + Log_func* log_func = reinterpret_cast(ev.log_func); + ev.table->file->binlog_log_row(ev.before_record, + ev.after_record, + log_func); + + ev.table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + ev.table->rpl_write_set = old_rpl_write_set; + + my_free(ev.before_record); + my_free(ev.after_record); + } + + trx->pending_cascade_binlog_row_events.clear(); +} + +void +ha_innobase::discard_pending_cascade_binlog() +{ + if (trx_t* trx = thd_to_trx(m_user_thd)) { + trx->free_cascade_binlog_row_events(); + } +} + /**********************************************************************//** Deletes a row given as the parameter. @return error number or 0 */ diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 93c442b2e72ea..6d8076210d3d4 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -121,6 +121,10 @@ class ha_innobase final : public handler int update_row(const uchar * old_data, const uchar * new_data) override; + void flush_pending_cascade_binlog() override; + + void discard_pending_cascade_binlog() override; + int delete_row(const uchar * buf) override; bool was_semi_consistent_read() override; @@ -515,6 +519,20 @@ class ha_innobase final : public handler /** Save CPU time with prebuilt/cached data structures */ row_prebuilt_t* m_prebuilt; +public: + row_prebuilt_t* innobase_prebuilt() const { return m_prebuilt; } + + void rebuild_template_for_cascade_binlog_row_image() + { + reset_template(); + build_template(true); + } + + void reset_template_for_cascade_binlog_row_image() + { + reset_template(); + } + /** Thread handle of the user currently using the handler; this is set in external_lock function */ THD* m_user_thd; diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h index 35e3cbe66315c..243333eed8f92 100644 --- a/storage/innobase/include/row0sel.h +++ b/storage/innobase/include/row0sel.h @@ -27,7 +27,7 @@ Created 12/19/1997 Heikki Tuuri #pragma once #include "data0data.h" -#include "que0types.h" +#include "dict0dict.h" #include "trx0types.h" #include "read0types.h" #include "row0types.h" @@ -36,6 +36,16 @@ Created 12/19/1997 Heikki Tuuri #include "btr0pcur.h" #include "row0mysql.h" +MY_ATTRIBUTE((warn_unused_result)) +bool row_sel_store_mysql_rec( + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, + const dtuple_t* vrow, + bool rec_clust, + const dict_index_t* index, + const rec_offs* offsets); + /*********************************************************************//** Creates a select node struct. @return own: select node struct */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index bef9311b81b32..5dc88011f9cbe 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -44,6 +44,14 @@ Created 3/26/1996 Heikki Tuuri struct mtr_t; struct rw_trx_hash_element_t; class ha_handler_stats; +struct TABLE; + +struct trx_cascade_binlog_row_event { + TABLE* table; + unsigned char* before_record; + unsigned char* after_record; + void* log_func; +}; /******************************************************************//** Set detailed error message for the transaction. */ @@ -962,6 +970,13 @@ struct trx_t : ilist_node<> transaction branch */ trx_mod_tables_t mod_tables; /*!< List of tables that were modified by this transaction */ + + std::vector pending_cascade_binlog_row_events; + + /** Free the events of any queued FK-cascade binlog row events and empty + the list. Used both to discard events whose row changes are being rolled + back and to reclaim memory for events that were never flushed. */ + void free_cascade_binlog_row_events(); /*------------------------------*/ char* detailed_error; /*!< detailed error message for last error, or empty. */ diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 38c47fc3b75ee..ed05f645b0d52 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -40,6 +40,10 @@ Created 4/20/1996 Heikki Tuuri #include "log0log.h" #include "eval0eval.h" #include "data0data.h" +#include "log0recv.h" +#include "handler.h" +#include "table.h" +#include "ha_innodb.h" #include "buf0lru.h" #include "fts0fts.h" #include "fts0types.h" @@ -47,12 +51,94 @@ Created 4/20/1996 Heikki Tuuri # include "btr0sea.h" #endif #include "sql_class.h" // THD +#include +#include #ifdef WITH_WSREP #include #include #include "ha_prototypes.h" #endif /* WITH_WSREP */ +TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len, + const char *table, size_t table_len); + +static bool row_ins_fk_cascade_delete_binlog_row(THD *thd, TABLE *table, + Event_log *bin_log, + binlog_cache_data *cache_data, + bool is_transactional, + ulong row_image, + const uchar *before_record, + const uchar *after_record + __attribute__((unused))) +{ + return thd_binlog_delete_row(thd, table, bin_log, cache_data, + (int) is_transactional, row_image, + before_record); +} + +static bool row_ins_fk_cascade_update_binlog_row(THD *thd, TABLE *table, + Event_log *bin_log, + binlog_cache_data *cache_data, + bool is_transactional, + ulong row_image, + const uchar *before_record, + const uchar *after_record) +{ + return thd_binlog_update_row(thd, table, bin_log, cache_data, + (int) is_transactional, row_image, + before_record, after_record); +} + +static TABLE* +row_ins_find_open_table_for_cascade_binlog( + trx_t* trx, + dict_table_t* child) +{ + THD* thd; + TABLE* mysql_table; + char db_buf[NAME_LEN + 1]; + char tbl_buf[NAME_LEN + 1]; + ulint db_buf_len; + ulint tbl_buf_len; + + thd = trx->mysql_thd; + if (thd == NULL) { + return NULL; + } + + if (!child->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) { + return NULL; + } + + mysql_table = find_fk_open_table(thd, + db_buf, db_buf_len, + tbl_buf, tbl_buf_len); + + return mysql_table; +} + +static inline bool +row_ins_allow_fk_cascade_binlog_for_table(const TABLE* table) +{ + if (table == NULL) { + return false; + } + + if (table->s->primary_key != MAX_KEY) { + return true; + } + + if (Field **vf = table->vfield) { + for (; *vf; vf++) { + if ((*vf)->flags & PART_KEY_FLAG) { + return false; + } + } + } + + return true; +} + /************************************************************************* IMPORTANT NOTE: Any operation that generates redo MUST check that there is enough space in the redo log before for that operation. This is @@ -269,7 +355,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_ins_clust_index_entry_by_modify( /*================================*/ - btr_pcur_t* pcur, /*!< in/out: a persistent cursor pointing + btr_pcur_t* pcur, /*!< in/out: a persistent cursor pointing to the clust_rec that is being modified. */ ulint flags, /*!< in: undo logging and locking flags */ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, @@ -299,7 +385,6 @@ row_ins_clust_index_entry_by_modify( /* In delete-marked records, DB_TRX_ID must always refer to an existing undo log record. */ ut_ad(rec_get_trx_id(rec, cursor->index())); - /* Build an update vector containing all the fields to be modified; NOTE that this vector may NOT contain system columns trx_id or roll_ptr */ @@ -1010,6 +1095,13 @@ row_ins_foreign_check_on_constraint( mem_heap_t* tmp_heap = NULL; doc_id_t doc_id = FTS_NULL_DOC_ID; + TABLE* child_mysql_table = NULL; + byte* before_mysql_rec = NULL; + byte* after_mysql_rec = NULL; + bool need_cascade_binlog = false; + bool can_cascade_binlog = false; + bool have_after_image = false; + DBUG_ENTER("row_ins_foreign_check_on_constraint"); trx = thr_get_trx(thr); @@ -1344,15 +1436,214 @@ row_ins_foreign_check_on_constraint( cascade->state = UPD_NODE_UPDATE_CLUSTERED; + if (thd_rpl_use_binlog_events_for_fk_cascade(trx->mysql_thd)) { + + child_mysql_table = row_ins_find_open_table_for_cascade_binlog(trx, table); + if (child_mysql_table != NULL) { + handler* file = child_mysql_table->file; + const bool allow_rpl_fk_cascade_binlog= + row_ins_allow_fk_cascade_binlog_for_table(child_mysql_table); + const bool emulate_binlog= +#ifdef WITH_WSREP + wsrep_emulate_binlog(trx->mysql_thd); +#else + false; +#endif + if (child_mysql_table->in_use == trx->mysql_thd + && (emulate_binlog || + thd_is_current_stmt_binlog_format_row(trx->mysql_thd)) + && ((emulate_binlog) || + (thd_rpl_use_binlog_events_for_fk_cascade(trx->mysql_thd) && + allow_rpl_fk_cascade_binlog)) + && file->prepare_for_row_logging()) { + need_cascade_binlog = true; + before_mysql_rec = static_cast( + mem_heap_alloc(tmp_heap, child_mysql_table->s->reclength)); + if (cascade->is_delete != PLAIN_DELETE) { + after_mysql_rec = static_cast( + mem_heap_alloc(tmp_heap, child_mysql_table->s->reclength)); + } + } + } + + if (need_cascade_binlog) { + ha_innobase* ib = static_cast(child_mysql_table->file); + row_prebuilt_t* prebuilt = ib->innobase_prebuilt(); + if (prebuilt != NULL && prebuilt->mysql_template != NULL) { + MY_BITMAP* old_read_set = child_mysql_table->read_set; + MY_BITMAP* old_write_set = child_mysql_table->write_set; + MY_BITMAP* old_rpl_write_set = child_mysql_table->rpl_write_set; + child_mysql_table->column_bitmaps_set_no_signal( + &child_mysql_table->tmp_set, &child_mysql_table->tmp_set); + bitmap_set_all(&child_mysql_table->tmp_set); + if (Field **vf = child_mysql_table->vfield) { + for (; *vf; vf++) { + bitmap_clear_bit(&child_mysql_table->tmp_set, (*vf)->field_index); + } + } + if (child_mysql_table->rpl_write_set == NULL) { + child_mysql_table->rpl_write_set = &child_mysql_table->tmp_set; + } + ib->rebuild_template_for_cascade_binlog_row_image(); + + mtr_start(mtr); + if (cascade->pcur->restore_position(BTR_SEARCH_LEAF, mtr) + == btr_pcur_t::SAME_ALL) { + const rec_t* before_rec = btr_pcur_get_rec(cascade->pcur); + mem_heap_t* offs_heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs_init(offsets_); + const rec_offs* offsets = rec_get_offsets( + before_rec, clust_index, offsets_, clust_index->n_core_fields, + ULINT_UNDEFINED, &offs_heap); + dict_index_t* saved_index = prebuilt->index; + prebuilt->index = clust_index; + if (!row_sel_store_mysql_rec(before_mysql_rec, prebuilt, + before_rec, NULL, true, + clust_index, offsets)) { + need_cascade_binlog = false; + } + prebuilt->index = saved_index; + if (UNIV_LIKELY_NULL(offs_heap)) { + mem_heap_free(offs_heap); + } + } else { + need_cascade_binlog = false; + } + mtr_commit(mtr); + + child_mysql_table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + child_mysql_table->rpl_write_set = old_rpl_write_set; + ib->reset_template_for_cascade_binlog_row_image(); + } else { + need_cascade_binlog = false; + } + } + } err = row_update_cascade_for_mysql(thr, cascade, foreign->foreign_table); mtr_start(mtr); + can_cascade_binlog = (err == DB_SUCCESS && need_cascade_binlog); + have_after_image = false; + + if (can_cascade_binlog && cascade->is_delete != PLAIN_DELETE) { + if (cascade->pcur->restore_position(BTR_SEARCH_LEAF, mtr) + == btr_pcur_t::SAME_ALL) { + const rec_t* after_rec = btr_pcur_get_rec(cascade->pcur); + if (page_rec_is_user_rec(after_rec) + && !rec_get_deleted_flag(after_rec, + dict_table_is_comp(table))) { + ha_innobase* ib = static_cast(child_mysql_table->file); + row_prebuilt_t* prebuilt = ib->innobase_prebuilt(); + if (prebuilt != NULL && prebuilt->mysql_template != NULL) { + MY_BITMAP* old_read_set = child_mysql_table->read_set; + MY_BITMAP* old_write_set = child_mysql_table->write_set; + MY_BITMAP* old_rpl_write_set = child_mysql_table->rpl_write_set; + child_mysql_table->column_bitmaps_set_no_signal( + &child_mysql_table->tmp_set, &child_mysql_table->tmp_set); + bitmap_set_all(&child_mysql_table->tmp_set); + if (Field **vf = child_mysql_table->vfield) { + for (; *vf; vf++) { + bitmap_clear_bit(&child_mysql_table->tmp_set, (*vf)->field_index); + } + } + if (child_mysql_table->rpl_write_set == NULL) { + child_mysql_table->rpl_write_set = &child_mysql_table->tmp_set; + } + ib->rebuild_template_for_cascade_binlog_row_image(); + + mem_heap_t* offs_heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs_init(offsets_); + const rec_offs* offsets = rec_get_offsets( + after_rec, clust_index, offsets_, + clust_index->n_core_fields, + ULINT_UNDEFINED, &offs_heap); + dict_index_t* saved_index = prebuilt->index; + prebuilt->index = clust_index; + have_after_image = row_sel_store_mysql_rec(after_mysql_rec, prebuilt, + after_rec, NULL, true, + clust_index, offsets); + prebuilt->index = saved_index; + child_mysql_table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + child_mysql_table->rpl_write_set = old_rpl_write_set; + ib->reset_template_for_cascade_binlog_row_image(); + if (UNIV_LIKELY_NULL(offs_heap)) { + mem_heap_free(offs_heap); + } + } + } + } + } + /* Restore pcur position */ if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) - != btr_pcur_t::SAME_ALL) { + != btr_pcur_t::SAME_ALL && err == DB_SUCCESS) { + err = DB_CORRUPTION; + } + + mtr_commit(mtr); + + if (can_cascade_binlog + && (cascade->is_delete == PLAIN_DELETE || have_after_image)) { + handler* file = child_mysql_table->file; + if (cascade->is_delete == PLAIN_DELETE) { + thd_binlog_mark_fk_cascade_events(trx->mysql_thd); + MY_BITMAP* old_read_set = child_mysql_table->read_set; + MY_BITMAP* old_write_set = child_mysql_table->write_set; + MY_BITMAP* old_rpl_write_set = child_mysql_table->rpl_write_set; + + child_mysql_table->column_bitmaps_set_no_signal( + &child_mysql_table->tmp_set, &child_mysql_table->tmp_set); + bitmap_set_all(&child_mysql_table->tmp_set); + if (Field **vf = child_mysql_table->vfield) { + for (; *vf; vf++) { + bitmap_clear_bit(&child_mysql_table->tmp_set, (*vf)->field_index); + } + } + if (child_mysql_table->rpl_write_set == NULL) { + child_mysql_table->rpl_write_set = &child_mysql_table->tmp_set; + } + Log_func* log_func = row_ins_fk_cascade_delete_binlog_row; + file->binlog_log_row(before_mysql_rec, NULL, log_func); + child_mysql_table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + child_mysql_table->rpl_write_set = old_rpl_write_set; + } else { + thd_binlog_mark_fk_cascade_events(trx->mysql_thd); + const ulint len = child_mysql_table->s->reclength; + unsigned char* before_copy = static_cast( + my_malloc(PSI_INSTRUMENT_ME, len, MYF(MY_WME))); + unsigned char* after_copy = static_cast( + my_malloc(PSI_INSTRUMENT_ME, len, MYF(MY_WME))); + + if (before_copy != NULL && after_copy != NULL) { + memcpy(before_copy, before_mysql_rec, len); + memcpy(after_copy, after_mysql_rec, len); + + trx_cascade_binlog_row_event ev; + ev.table = child_mysql_table; + ev.before_record = before_copy; + ev.after_record = after_copy; + ev.log_func = reinterpret_cast( + row_ins_fk_cascade_update_binlog_row); + trx->pending_cascade_binlog_row_events.push_back(ev); + } else { + if (before_copy != NULL) { + my_free(before_copy); + } + if (after_copy != NULL) { + my_free(after_copy); + } + } + } + } + + mtr_start(mtr); + if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) + != btr_pcur_t::SAME_ALL && err == DB_SUCCESS) { err = DB_CORRUPTION; } @@ -2562,7 +2853,7 @@ statement @return true if it is insert statement */ static bool thd_sql_is_insert(const THD *thd) noexcept { - switch (thd->lex->sql_command) { + switch (thd_sql_command(thd)) { case SQLCOM_INSERT: case SQLCOM_INSERT_SELECT: return true; diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 2fec0d025f9a4..cd531b6a6ce1e 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -3134,7 +3134,7 @@ be needed in the query. @retval true on success @retval false if not all columns could be retrieved */ MY_ATTRIBUTE((warn_unused_result)) -static bool row_sel_store_mysql_rec( +bool row_sel_store_mysql_rec( byte* mysql_rec, row_prebuilt_t* prebuilt, const rec_t* rec, diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index e0b1904175380..90142afc136ae 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -161,6 +161,21 @@ trx_init( #endif /* WITH_WSREP */ } +void trx_t::free_cascade_binlog_row_events() +{ + /* The before/after record images are allocated in + row_ins_foreign_check_on_constraint() and are normally freed when + ha_innobase::flush_pending_cascade_binlog() emits them at commit. + If the transaction rolls back, or the events are never flushed, + the buffers would otherwise leak, so free them here. + Note: my_free(NULL) is a no-op. */ + for (auto& ev : pending_cascade_binlog_row_events) { + my_free(ev.before_record); + my_free(ev.after_record); + } + pending_cascade_binlog_row_events.clear(); +} + /** For managing the life-cycle of the trx_t instance that we get from the pool. */ struct TrxFactory { @@ -179,6 +194,9 @@ struct TrxFactory { new(&trx->mod_tables) trx_mod_tables_t(); + new(&trx->pending_cascade_binlog_row_events) + std::vector(); + new(&trx->lock.table_locks) lock_list(); new(&trx->read_view) ReadView(); @@ -244,6 +262,9 @@ struct TrxFactory { trx->mod_tables.~trx_mod_tables_t(); + trx->free_cascade_binlog_row_events(); + trx->pending_cascade_binlog_row_events.~vector(); + ut_ad(!trx->read_view.is_open()); trx->lock.table_locks.~lock_list(); @@ -410,6 +431,8 @@ void trx_t::free() noexcept trx_sys.deregister_trx(this); check_unique_secondary= true; check_foreigns= true; + + free_cascade_binlog_row_events(); assert_freed(); trx_sys.rw_trx_hash.put_pins(this); mysql_thd= nullptr; @@ -930,7 +953,7 @@ trx_start_low( /* Check whether it is an AUTOCOMMIT SELECT */ if (const THD* thd = trx->mysql_thd) { trx->auto_commit = !(thd->variables.option_bits - & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && thd->lex->sql_command == SQLCOM_SELECT; trx->read_only = (!trx->dict_operation && thd->tx_read_only) || srv_read_only_mode;