diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h index 5c2a7311dd27e..a03dddb0dd195 100644 --- a/include/mysql/plugin.h +++ b/include/mysql/plugin.h @@ -697,6 +697,7 @@ int thd_in_lock_tables(const MYSQL_THD thd); int thd_tablespace_op(const MYSQL_THD thd); long long thd_test_options(const MYSQL_THD thd, long long test_options); int thd_sql_command(const MYSQL_THD thd); + struct DDL_options_st; struct DDL_options_st *thd_ddl_options(const MYSQL_THD thd); void thd_storage_lock_wait(MYSQL_THD thd, long long value); diff --git a/include/mysql/service_thd_binlog.h b/include/mysql/service_thd_binlog.h new file mode 100644 index 0000000000000..912f4213ee662 --- /dev/null +++ b/include/mysql/service_thd_binlog.h @@ -0,0 +1,47 @@ +/* Copyright (c) 2026, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct TABLE; +class Event_log; +class binlog_cache_data; + +int thd_is_current_stmt_binlog_format_row(const MYSQL_THD thd); + +int thd_rpl_use_binlog_events_for_fk_cascade(const MYSQL_THD thd); + +void thd_binlog_mark_fk_cascade_events(MYSQL_THD thd); + +int thd_binlog_update_row(MYSQL_THD thd, struct TABLE *table, + class Event_log *bin_log, + class binlog_cache_data *cache_data, + int is_trans, unsigned long row_image, + const unsigned char *before_record, + const unsigned char *after_record); + +int thd_binlog_delete_row(MYSQL_THD thd, struct TABLE *table, + class Event_log *bin_log, + class binlog_cache_data *cache_data, + int is_trans, unsigned long row_image, + const unsigned char *before_record); + +#ifdef __cplusplus +} +#endif diff --git a/include/mysql/service_wsrep.h b/include/mysql/service_wsrep.h index 8c001ca147063..9d0114ac04f0e 100644 --- a/include/mysql/service_wsrep.h +++ b/include/mysql/service_wsrep.h @@ -153,6 +153,7 @@ extern struct wsrep_service_st { #define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I) #define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T) #define wsrep_get_domain_id(T) wsrep_service->wsrep_get_domain_id_func(T) +#define wsrep_emulate_binlog(T) wsrep_service->wsrep_emulate_binlog_func(T) #else #define MYSQL_SERVICE_WSREP_STATIC_INCLUDED @@ -265,5 +266,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd, /* declare parallel applying unsafety for the THD */ extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd); extern "C" uint32 wsrep_get_domain_id(); +extern "C" my_bool wsrep_emulate_binlog(const MYSQL_THD thd); #endif #endif /* MYSQL_SERVICE_WSREP_INCLUDED */ diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index 623311c170616..04cf3a8be387d 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -1389,6 +1389,9 @@ The following specify which files/extra groups are read (specified before remain play when stop slave is executed --rpl-semi-sync-slave-trace-level=# The tracing level for semi-sync replication + --rpl-use-binlog-events-for-fk-cascade + If enabled, the master will write row events for foreign + key cascade operations --safe-mode Skip some optimize stages (for testing). Deprecated, will be removed in a future release. --safe-user-create Don't allow new user creation by the user who has no @@ -2096,6 +2099,7 @@ rpl-semi-sync-slave-delay-master FALSE rpl-semi-sync-slave-enabled FALSE rpl-semi-sync-slave-kill-conn-timeout 5 rpl-semi-sync-slave-trace-level 32 +rpl-use-binlog-events-for-fk-cascade FALSE safe-user-create FALSE secure-file-priv (No default value) secure-timestamp NO diff --git a/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result new file mode 100644 index 0000000000000..5eb0ef025a8bf --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row.result @@ -0,0 +1,85 @@ +include/master-slave.inc +[connection master] +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE CASCADE +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +2 +connection master; +DELETE FROM p WHERE id=2; +connection slave; +connection slave; +SELECT COUNT(*) FROM p; +COUNT(*) +0 +SELECT COUNT(*) FROM c; +COUNT(*) +0 +connection master; +FLUSH BINARY LOGS; +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_binlog_row_off.sql +NOT FOUND /### DELETE FROM `test`.`c`/ in fk_cascade_binlog_row_off.sql +DROP TABLE c, p; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE CASCADE +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +2 +connection master; +DELETE FROM p WHERE id=2; +connection slave; +connection slave; +SELECT COUNT(*) FROM p; +COUNT(*) +0 +SELECT COUNT(*) FROM c; +COUNT(*) +0 +connection master; +FLUSH BINARY LOGS; +FOUND 1 /### UPDATE `test`.`c`/ in fk_cascade_binlog_row_on.sql +FOUND 1 /### DELETE FROM `test`.`c`/ in fk_cascade_binlog_row_on.sql +DROP TABLE c, p; +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result new file mode 100644 index 0000000000000..25eee590001fa --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_fk_cascade_binlog_row_rollback.result @@ -0,0 +1,140 @@ +include/master-slave.inc +[connection master] +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE CASCADE +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +connection slave; +connection master; +# +# Phase 1: full transaction ROLLBACK +# +FLUSH BINARY LOGS; +BEGIN; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK; +# master: parent and child unchanged +SELECT * FROM p ORDER BY id; +id v +1 10 +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection slave; +connection slave; +# slave: child unchanged +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection master; +FLUSH BINARY LOGS; +# no cascade child row event must be present +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_rollback_full.sql +# +# Phase 2: ROLLBACK TO SAVEPOINT (surviving work before the savepoint) +# +FLUSH BINARY LOGS; +BEGIN; +INSERT INTO p VALUES (3, 30); +SAVEPOINT sp1; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK TO SAVEPOINT sp1; +COMMIT; +# master: cascade undone, INSERT before savepoint kept +SELECT * FROM p ORDER BY id; +id v +1 10 +3 30 +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection slave; +connection slave; +# slave: child unchanged, parent has surviving row +SELECT * FROM p ORDER BY id; +id v +1 10 +3 30 +SELECT * FROM c ORDER BY id; +id pid v +100 1 20 +connection master; +FLUSH BINARY LOGS; +# cascade child row event must be absent +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_rollback_sp.sql +# surviving INSERT before the savepoint must be replicated +FOUND 1 /### INSERT INTO `test`.`p`/ in fk_cascade_rollback_sp.sql +DROP TABLE c, p; +# +# Phase 3: statement rollback, transaction continues and commits +# +CREATE TABLE p ( +id INT PRIMARY KEY, +u INT, +UNIQUE KEY uq_u(u) +) ENGINE=InnoDB; +CREATE TABLE c ( +cid INT PRIMARY KEY, +pid INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON UPDATE CASCADE +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 100), (2, 200); +INSERT INTO c VALUES (11, 1); +connection slave; +connection master; +FLUSH BINARY LOGS; +BEGIN; +UPDATE p SET id=id+10, u=100 WHERE id IN (1,2) ORDER BY id; +ERROR 23000: Duplicate entry '100' for key 'uq_u' +INSERT INTO p VALUES (9, 900); +COMMIT; +# master: failed UPDATE rolled back, INSERT committed +SELECT * FROM p ORDER BY id; +id u +1 100 +2 200 +9 900 +SELECT * FROM c ORDER BY cid; +cid pid +11 1 +connection slave; +connection slave; +# slave: child unchanged, parent has committed row +SELECT * FROM p ORDER BY id; +id u +1 100 +2 200 +9 900 +SELECT * FROM c ORDER BY cid; +cid pid +11 1 +connection master; +FLUSH BINARY LOGS; +# discarded cascade child row event must be absent +NOT FOUND /### UPDATE `test`.`c`/ in fk_cascade_rollback_stmt.sql +# committed INSERT must be replicated +FOUND 1 /### INSERT INTO `test`.`p`/ in fk_cascade_rollback_stmt.sql +DROP TABLE c, p; +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result b/mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result new file mode 100644 index 0000000000000..43a6c65f91b69 --- /dev/null +++ b/mysql-test/suite/rpl/r/rpl_fk_set_null_binlog_row.result @@ -0,0 +1,61 @@ +include/master-slave.inc +[connection master] +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT NULL, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE SET NULL +ON UPDATE SET NULL +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +NULL +connection master; +FLUSH BINARY LOGS; +NOT FOUND /### UPDATE `test`.`c`/ in fk_set_null_binlog_row_off.sql +DROP TABLE c, p; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; +CREATE TABLE p ( +id INT PRIMARY KEY, +v INT +) ENGINE=InnoDB; +CREATE TABLE c ( +id INT PRIMARY KEY, +pid INT NULL, +v INT, +KEY(pid), +CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) +ON DELETE SET NULL +ON UPDATE SET NULL +) ENGINE=InnoDB; +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); +UPDATE p SET id=2 WHERE id=1; +connection slave; +SELECT pid FROM c ORDER BY id; +pid +NULL +connection master; +FLUSH BINARY LOGS; +FOUND 1 /### UPDATE `test`.`c`/ in fk_set_null_binlog_row_on.sql +DROP TABLE c, p; +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test new file mode 100644 index 0000000000000..73d083a66dda5 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row.test @@ -0,0 +1,133 @@ +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_row.inc + +--source include/master-slave.inc + +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; + +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; + +# Phase A: feature OFF (no explicit FK cascade row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE CASCADE + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +connection slave; +SELECT pid FROM c ORDER BY id; + +connection master; +DELETE FROM p WHERE id=2; + +sync_slave_with_master; +connection slave; +SELECT COUNT(*) FROM p; +SELECT COUNT(*) FROM c; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--let SEARCH_PATTERN= ### DELETE FROM `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +# Phase B: feature ON (explicit FK cascade row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE CASCADE + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +connection slave; +SELECT pid FROM c ORDER BY id; + +connection master; +DELETE FROM p WHERE id=2; + +sync_slave_with_master; +connection slave; +SELECT COUNT(*) FROM p; +SELECT COUNT(*) FROM c; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql +--let SEARCH_ABORT= NOT FOUND|FOUND 0|FOUND [2-9] +--source include/search_pattern_in_file.inc + +--let SEARCH_PATTERN= ### DELETE FROM `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql +--let SEARCH_ABORT= NOT FOUND|FOUND 0|FOUND [2-9] +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; + +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_off.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_binlog_row_on.sql + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test new file mode 100644 index 0000000000000..b28060718ece0 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_fk_cascade_binlog_row_rollback.test @@ -0,0 +1,201 @@ +# +# MDEV-38243 +# +# When rpl_use_binlog_events_for_fk_cascade is enabled the master collects +# row events for FK cascade operations. If the row changes those events +# describe are rolled back, the events must be discarded (freed) and must not +# end up in the binary log. This test exercises the three rollback paths that +# discard the queued cascade events: +# +# 1. full transaction ROLLBACK (ha_rollback_trans, all=true) +# 2. ROLLBACK TO SAVEPOINT (ha_rollback_to_savepoint) +# 3. statement rollback, transaction kept (ha_rollback_trans, all=false) +# +# In every case the cascaded child row event (### UPDATE `test`.`c`) must be +# absent from the binary log, the master and slave data must stay consistent, +# and any surviving work in the same transaction must still replicate. +# +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_row.inc + +--source include/master-slave.inc + +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; + +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE CASCADE + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +sync_slave_with_master; +connection master; + +--echo # +--echo # Phase 1: full transaction ROLLBACK +--echo # +FLUSH BINARY LOGS; +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) + +BEGIN; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK; + +--echo # master: parent and child unchanged +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY id; + +sync_slave_with_master; +connection slave; +--echo # slave: child unchanged +SELECT * FROM c ORDER BY id; + +connection master; +FLUSH BINARY LOGS; +--let $datadir = `SELECT @@datadir` +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_full.sql + +--echo # no cascade child row event must be present +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_full.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--echo # +--echo # Phase 2: ROLLBACK TO SAVEPOINT (surviving work before the savepoint) +--echo # +FLUSH BINARY LOGS; +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) + +BEGIN; +INSERT INTO p VALUES (3, 30); +SAVEPOINT sp1; +UPDATE p SET id=2 WHERE id=1; +ROLLBACK TO SAVEPOINT sp1; +COMMIT; + +--echo # master: cascade undone, INSERT before savepoint kept +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY id; + +sync_slave_with_master; +connection slave; +--echo # slave: child unchanged, parent has surviving row +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY id; + +connection master; +FLUSH BINARY LOGS; +--let $datadir = `SELECT @@datadir` +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql + +--echo # cascade child row event must be absent +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--echo # surviving INSERT before the savepoint must be replicated +--let SEARCH_PATTERN= ### INSERT INTO `test`.`p` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql +--let SEARCH_ABORT= NOT FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +--echo # +--echo # Phase 3: statement rollback, transaction continues and commits +--echo # +# The parent has a secondary UNIQUE key. A multi-row UPDATE cascades on the +# first row (queuing a child cascade event) and then hits a duplicate-key +# error on the second row, which rolls back the statement. The transaction +# stays open and commits later work. The discarded cascade event must not be +# flushed into the binary log at COMMIT. +CREATE TABLE p ( + id INT PRIMARY KEY, + u INT, + UNIQUE KEY uq_u(u) +) ENGINE=InnoDB; + +CREATE TABLE c ( + cid INT PRIMARY KEY, + pid INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON UPDATE CASCADE +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 100), (2, 200); +INSERT INTO c VALUES (11, 1); + +sync_slave_with_master; +connection master; + +FLUSH BINARY LOGS; +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) + +BEGIN; +# row id=1: id 1->11 cascades c.pid 1->11 (event queued), u 100->100 (no change) +# row id=2: u 200->100 duplicates row 1's u -> ER_DUP_ENTRY -> statement rollback +--error ER_DUP_ENTRY +UPDATE p SET id=id+10, u=100 WHERE id IN (1,2) ORDER BY id; +INSERT INTO p VALUES (9, 900); +COMMIT; + +--echo # master: failed UPDATE rolled back, INSERT committed +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY cid; + +sync_slave_with_master; +connection slave; +--echo # slave: child unchanged, parent has committed row +SELECT * FROM p ORDER BY id; +SELECT * FROM c ORDER BY cid; + +connection master; +FLUSH BINARY LOGS; +--let $datadir = `SELECT @@datadir` +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql + +--echo # discarded cascade child row event must be absent +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +--echo # committed INSERT must be replicated +--let SEARCH_PATTERN= ### INSERT INTO `test`.`p` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql +--let SEARCH_ABORT= NOT FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; + +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_full.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_sp.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_cascade_rollback_stmt.sql + +--source include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test b/mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test new file mode 100644 index 0000000000000..42c7278e45ee3 --- /dev/null +++ b/mysql-test/suite/rpl/t/rpl_fk_set_null_binlog_row.test @@ -0,0 +1,105 @@ +--source include/have_innodb.inc +--source include/have_log_bin.inc +--source include/have_binlog_format_row.inc + +--source include/master-slave.inc + +SET @old_binlog_format := @@session.binlog_format; +SET @old_default_storage_engine := @@session.default_storage_engine; +SET @old_rpl_use_binlog_events_for_fk_cascade := @@session.rpl_use_binlog_events_for_fk_cascade; + +SET SESSION default_storage_engine=InnoDB; +SET SESSION binlog_format=ROW; + +# Phase A: feature OFF (no explicit FK SET NULL row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=0; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT NULL, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE SET NULL + ON UPDATE SET NULL +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +SELECT pid FROM c ORDER BY id; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_off.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_off.sql +--let SEARCH_ABORT= FOUND +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +# Phase B: feature ON (explicit FK SET NULL row events in binlog) +SET SESSION rpl_use_binlog_events_for_fk_cascade=1; + +CREATE TABLE p ( + id INT PRIMARY KEY, + v INT +) ENGINE=InnoDB; + +CREATE TABLE c ( + id INT PRIMARY KEY, + pid INT NULL, + v INT, + KEY(pid), + CONSTRAINT fk FOREIGN KEY (pid) REFERENCES p(id) + ON DELETE SET NULL + ON UPDATE SET NULL +) ENGINE=InnoDB; + +INSERT INTO p VALUES (1, 10); +INSERT INTO c VALUES (100, 1, 20); + +UPDATE p SET id=2 WHERE id=1; + +sync_slave_with_master; +SELECT pid FROM c ORDER BY id; + +connection master; + +--let $binlog = query_get_value(SHOW MASTER STATUS, File, 1) +--let $datadir = `SELECT @@datadir` + +FLUSH BINARY LOGS; + +--exec $MYSQL_BINLOG --verbose --verbose --base64-output=DECODE-ROWS $datadir/$binlog > $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_on.sql + +--let SEARCH_PATTERN= ### UPDATE `test`.`c` +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_on.sql +--let SEARCH_ABORT= NOT FOUND|FOUND 0|FOUND [2-9] +--source include/search_pattern_in_file.inc + +DROP TABLE c, p; + +SET SESSION default_storage_engine=@old_default_storage_engine; +SET SESSION binlog_format=@old_binlog_format; +SET SESSION rpl_use_binlog_events_for_fk_cascade=@old_rpl_use_binlog_events_for_fk_cascade; + +--remove_file $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_off.sql +--remove_file $MYSQLTEST_VARDIR/tmp/fk_set_null_binlog_row_on.sql + +--source include/rpl_end.inc diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 481ca0b5f4030..3c7097a0a5245 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -3462,6 +3462,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME RPL_USE_BINLOG_EVENTS_FOR_FK_CASCADE +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT If enabled, the master will write row events for foreign key cascade operations +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME SECURE_FILE_PRIV VARIABLE_SCOPE GLOBAL VARIABLE_TYPE VARCHAR diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index 579dae7af5aab..eed0d7feb7f03 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -4032,6 +4032,16 @@ NUMERIC_BLOCK_SIZE 1 ENUM_VALUE_LIST NULL READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED +VARIABLE_NAME RPL_USE_BINLOG_EVENTS_FOR_FK_CASCADE +VARIABLE_SCOPE SESSION +VARIABLE_TYPE BOOLEAN +VARIABLE_COMMENT If enabled, the master will write row events for foreign key cascade operations +NUMERIC_MIN_VALUE NULL +NUMERIC_MAX_VALUE NULL +NUMERIC_BLOCK_SIZE NULL +ENUM_VALUE_LIST OFF,ON +READ_ONLY NO +COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME SECURE_FILE_PRIV VARIABLE_SCOPE GLOBAL VARIABLE_TYPE VARCHAR diff --git a/sql/handler.cc b/sql/handler.cc index 12dc4240c63fb..bfb2807370691 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -118,6 +118,30 @@ static handlerton *installed_htons[128]; KEY_CREATE_INFO default_key_create_info= { HA_KEY_ALG_UNDEF, 0, 0, {NullS, 0}, {NullS, 0}, false }; +static void flush_pending_cascade_binlog_for_thd(THD *thd) +{ + if (!thd || thd->rgi_slave) return; + + TABLE *table; + for (table = thd->open_tables; table; table = table->next) { + if (table->file) { + table->file->flush_pending_cascade_binlog(); + } + } +} + +static void discard_pending_cascade_binlog_for_thd(THD *thd) +{ + if (!thd || thd->rgi_slave) return; + + TABLE *table; + for (table = thd->open_tables; table; table = table->next) { + if (table->file) { + table->file->discard_pending_cascade_binlog(); + } + } +} + /* number of entries in handlertons[] */ ulong total_ha= 0; /* number of storage engines (from handlertons[]) that support 2pc */ @@ -2144,6 +2168,14 @@ int ha_commit_trans(THD *thd, bool all) thd->mdl_context.release_lock(thd->backup_commit_lock); thd->backup_commit_lock= 0; } + + if (!error) + { + if ((thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd)) && + (all || thd->in_active_multi_stmt_transaction())) + flush_pending_cascade_binlog_for_thd(thd); + } #ifdef WITH_WSREP if (wsrep_is_active(thd) && is_real_trans && !error && (rw_ha_count == 0 || all) && @@ -2337,12 +2369,11 @@ int ha_rollback_trans(THD *thd, bool all) rollback without signalling following transactions. And in release builds, we explicitly do the signalling before rolling back. */ - DBUG_ASSERT( - !(thd->rgi_slave && - !thd->rgi_slave->worker_error && - thd->rgi_slave->did_mark_start_commit) || - (thd->transaction->xid_state.is_explicit_XA() || - (thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_PREPARED_XA))); + if (!(thd->rgi_slave && thd->rgi_slave->worker_error)) + DBUG_ASSERT( + !(thd->rgi_slave && thd->rgi_slave->did_mark_start_commit) || + (thd->transaction->xid_state.is_explicit_XA() || + (thd->rgi_slave->gtid_ev_flags2 & Gtid_log_event::FL_PREPARED_XA))); if (thd->rgi_slave && !thd->rgi_slave->worker_error && @@ -2424,6 +2455,19 @@ int ha_rollback_trans(THD *thd, bool all) binlog_post_rollback(thd, all); } + /* + The engines have rolled back (the whole transaction when all==true, or the + current statement to its implicit savepoint when all==false). Any queued + FK-cascade row events describe row changes that were just undone, so + discard and free them rather than letting them be flushed into the binary + log by a later statement or at commit. For a full rollback the queue would + also be freed when the trx is released, but discarding here keeps the + statement-rollback case correct and the behaviour consistent. + */ + if (thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd)) + discard_pending_cascade_binlog_for_thd(thd); + #ifdef WITH_WSREP if (WSREP(thd) && thd->is_error()) { @@ -3344,6 +3388,18 @@ int ha_rollback_to_savepoint(THD *thd, SAVEPOINT *sv) my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), err); error=1; } + + /* + The cascade row events queued for this transaction describe row + changes that are being rolled back to the savepoint (or the queue is + empty because earlier statements already flushed their events into the + binlog cache, which the binlog savepoint machinery truncates + separately). Either way we must not write these events, so discard and + free them rather than flushing them into the binary log. + */ + if (thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd)) + discard_pending_cascade_binlog_for_thd(thd); #ifdef WITH_WSREP if (WSREP(thd) && ht->flags & HTON_WSREP_REPLICATION) { diff --git a/sql/handler.h b/sql/handler.h index f0763aebe717c..cdb1d23c3c5dd 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -5327,7 +5327,18 @@ class handler :public Sql_alloc bool check_table_binlog_row_based(); bool prepare_for_row_logging(); int prepare_for_modify(bool can_set_fields, bool can_lookup); - int binlog_log_row(const uchar *before_record, const uchar *after_record, + + virtual void flush_pending_cascade_binlog() {} + + /* + Discard FK-cascade row events queued for this transaction, + when transaction rolls back. + */ + virtual void discard_pending_cascade_binlog() {} + + int prepare_for_insert(bool do_create); + int binlog_log_row(const uchar *before_record, + const uchar *after_record, Log_func *log_func); inline void clear_cached_table_binlog_row_based_flag() diff --git a/sql/log.cc b/sql/log.cc index 387795e30fce7..2c155fda778b7 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -8068,7 +8068,27 @@ int binlog_flush_pending_rows_event(THD *thd, bool stmt_end, */ if (stmt_end) { - pending->set_flags(Rows_log_event::STMT_END_F); + if (thd->binlog_fk_cascade_events && + !thd->rgi_slave && + (thd->variables.rpl_use_binlog_events_for_fk_cascade || + WSREP_EMULATE_BINLOG(thd))) + { + TABLE *table; + for (table= thd->open_tables; table; table= table->next) + { + if (table->file) + table->file->flush_pending_cascade_binlog(); + } + } + + /* + Flushing cascaded row events may have created a new pending event or + replaced the current one. Ensure we mark the final pending event as + statement end. + */ + pending= cache_data->pending(); + if (pending) + pending->set_flags(Rows_log_event::STMT_END_F); thd->reset_binlog_for_next_statement(); } @@ -8294,6 +8314,9 @@ Event_log::prepare_pending_rows_event(THD *thd, TABLE* table, if (unlikely(!ev)) DBUG_RETURN(NULL); ev->server_id= serv_id; // I don't like this, it's too easy to forget. + + if (thd->binlog_fk_cascade_events) + ev->set_flags(Rows_log_event::FK_CASCADE_EVENTS_F); /* flush the pending event and replace it with the newly created event... diff --git a/sql/log_event.h b/sql/log_event.h index 517b2e62fd06e..60b29732e8e0a 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -4789,6 +4789,8 @@ class Rows_log_event : public Log_event */ COMPLETE_ROWS_F = (1U << 3), + FK_CASCADE_EVENTS_F = (1U << 4), + /* Value of the OPTION_NO_CHECK_CONSTRAINT_CHECKS flag in thd->options */ NO_CHECK_CONSTRAINT_CHECKS_F = (1U << 7) }; diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc index 3c8d7bea47381..7a56b850d2350 100644 --- a/sql/log_event_server.cc +++ b/sql/log_event_server.cc @@ -5051,6 +5051,9 @@ int Rows_log_event::do_apply_event(rpl_group_info *rgi) else thd->variables.option_bits&= ~OPTION_NO_CHECK_CONSTRAINT_CHECKS; + if (get_flags(FK_CASCADE_EVENTS_F)) + thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS; + /* A small test to verify that objects have consistent types */ DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS)); diff --git a/sql/service_wsrep.cc b/sql/service_wsrep.cc index 3f09e9d8b3e47..705d64921c372 100644 --- a/sql/service_wsrep.cc +++ b/sql/service_wsrep.cc @@ -441,3 +441,8 @@ extern "C" my_bool wsrep_thd_is_local_transaction(const THD *thd) return (wsrep_thd_is_local(thd) && thd->wsrep_cs().transaction().active()); } + +extern "C" my_bool wsrep_emulate_binlog(const THD *thd) +{ + return WSREP_EMULATE_BINLOG_NNULL(thd); +} diff --git a/sql/sql_class.cc b/sql/sql_class.cc index 70c571f14e6ec..b26a1024896a1 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -93,6 +93,22 @@ extern "C" const uchar *get_var_key(const void *entry_, size_t *length, return reinterpret_cast(entry->name.str); } +void THD::binlog_mark_fk_cascade_events() +{ + binlog_fk_cascade_events= true; + + if (binlog_cache_mngr *cache_mngr= binlog_get_cache_mngr()) + { + if (Rows_log_event *pending= binlog_get_pending_rows_event( + cache_mngr, use_trans_cache(this, false))) + pending->set_flags(Rows_log_event::FK_CASCADE_EVENTS_F); + + if (Rows_log_event *pending= binlog_get_pending_rows_event( + cache_mngr, use_trans_cache(this, true))) + pending->set_flags(Rows_log_event::FK_CASCADE_EVENTS_F); + } +} + extern "C" void free_user_var(void *entry_) { user_var_entry *entry= static_cast(entry_); @@ -507,6 +523,47 @@ int thd_sql_command(const THD *thd) return (int) thd->lex->sql_command; } +extern "C" +int thd_is_current_stmt_binlog_format_row(const THD *thd) +{ + return (int) thd->is_current_stmt_binlog_format_row(); +} + +extern "C" +int thd_rpl_use_binlog_events_for_fk_cascade(const THD *thd) +{ + return (int) thd->variables.rpl_use_binlog_events_for_fk_cascade; +} + +extern "C" +void thd_binlog_mark_fk_cascade_events(THD *thd) +{ + thd->binlog_mark_fk_cascade_events(); +} + +extern "C" +int thd_binlog_update_row(THD *thd, TABLE *table, Event_log *bin_log, + binlog_cache_data *cache_data, int is_trans, + unsigned long row_image, + const unsigned char *before_record, + const unsigned char *after_record) +{ + return thd->binlog_update_row(table, bin_log, cache_data, (bool) is_trans, + (enum_binlog_row_image) row_image, + before_record, after_record); +} + +extern "C" +int thd_binlog_delete_row(THD *thd, TABLE *table, Event_log *bin_log, + binlog_cache_data *cache_data, int is_trans, + unsigned long row_image, + const unsigned char *before_record) +{ + return thd->binlog_delete_row(table, bin_log, cache_data, (bool) is_trans, + (enum_binlog_row_image) row_image, + before_record); +} + /* Returns options used with DDL's, like IF EXISTS etc... Will returns 'nonsense' if the command was not a DDL. @@ -894,6 +951,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) mysys_var=0; binlog_evt_union.do_union= FALSE; binlog_table_maps= FALSE; + binlog_fk_cascade_events= FALSE; binlog_xid= 0; enable_slow_log= 0; durability_property= HA_REGULAR_DURABILITY; diff --git a/sql/sql_class.h b/sql/sql_class.h index aad3defa963be..4e7b9e0e12bec 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -916,6 +916,7 @@ typedef struct system_variables my_bool sql_log_bin; my_bool binlog_annotate_row_events; my_bool binlog_direct_non_trans_update; + my_bool rpl_use_binlog_events_for_fk_cascade; my_bool column_compression_zlib_wrap; my_bool sysdate_is_now; my_bool wsrep_on; @@ -3850,6 +3851,10 @@ class THD: public THD_count, /* this must be first */ /* 1 if binlog table maps has been written */ bool binlog_table_maps; + bool binlog_fk_cascade_events; + + void binlog_mark_fk_cascade_events(); + void issue_unsafe_warnings(); void reset_unsafe_warnings() { binlog_unsafe_warning_flags= 0; } @@ -3857,6 +3862,7 @@ class THD: public THD_count, /* this must be first */ void reset_binlog_for_next_statement() { binlog_table_maps= 0; + binlog_fk_cascade_events= false; } bool binlog_table_should_be_logged(const LEX_CSTRING *db); diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index e6e0579dba7b0..8f0751b37bd4b 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -781,6 +781,26 @@ Sys_binlog_create_tmptable_format( NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(binlog_create_tmp_format_check)); +static bool rpl_use_binlog_events_for_fk_cascade_check(sys_var *self, THD *thd, set_var *var) +{ + if (var->type == OPT_GLOBAL) + return false; + + if (unlikely(error_if_in_trans_or_substatement(thd, + ER_STORED_FUNCTION_PREVENTS_SWITCH_SQL_LOG_BIN, + ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SQL_LOG_BIN))) + return true; + + return false; +} + +static Sys_var_mybool Sys_rpl_use_binlog_events_for_fk_cascade( + "rpl_use_binlog_events_for_fk_cascade", + "If enabled, the master will write row events for foreign key cascade operations", + SESSION_VAR(rpl_use_binlog_events_for_fk_cascade), + CMD_LINE(OPT_ARG), DEFAULT(FALSE), + NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(rpl_use_binlog_events_for_fk_cascade_check)); + static bool deprecated_explicit_defaults_for_timestamp(sys_var *self, THD *thd, set_var *var) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index c276db592dac0..4978631193f82 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -49,8 +49,10 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include #include -#include +#include +#include #include +#include #include #include "sql_type_geom.h" #include "scope.h" @@ -8873,6 +8875,82 @@ ha_innobase::update_row( DBUG_RETURN(err); } +void +ha_innobase::flush_pending_cascade_binlog() +{ + trx_t* trx = thd_to_trx(m_user_thd); + const bool emulate_binlog= +#ifdef WITH_WSREP + wsrep_emulate_binlog(m_user_thd); +#else + false; +#endif + + if (!thd_rpl_use_binlog_events_for_fk_cascade(m_user_thd) && + !emulate_binlog) { + return; + } + + if (trx == NULL || trx->pending_cascade_binlog_row_events.empty()) { + return; + } + + m_user_thd->binlog_mark_fk_cascade_events(); + + for (auto& ev : trx->pending_cascade_binlog_row_events) { + if (ev.table == NULL || ev.table->file == NULL) { + if (ev.before_record) { + my_free(ev.before_record); + } + if (ev.after_record) { + my_free(ev.after_record); + } + continue; + } + + if (ev.table->s->tmp_table != NO_TMP_TABLE) { + if (ev.before_record) { + my_free(ev.before_record); + } + if (ev.after_record) { + my_free(ev.after_record); + } + continue; + } + + MY_BITMAP* old_read_set = ev.table->read_set; + MY_BITMAP* old_write_set = ev.table->write_set; + MY_BITMAP* old_rpl_write_set = ev.table->rpl_write_set; + + ev.table->column_bitmaps_set_no_signal( + &ev.table->s->all_set, &ev.table->s->all_set); + if (ev.table->rpl_write_set == NULL) { + ev.table->rpl_write_set = &ev.table->s->all_set; + } + + Log_func* log_func = reinterpret_cast(ev.log_func); + ev.table->file->binlog_log_row(ev.before_record, + ev.after_record, + log_func); + + ev.table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + ev.table->rpl_write_set = old_rpl_write_set; + + my_free(ev.before_record); + my_free(ev.after_record); + } + + trx->pending_cascade_binlog_row_events.clear(); +} + +void +ha_innobase::discard_pending_cascade_binlog() +{ + if (trx_t* trx = thd_to_trx(m_user_thd)) { + trx->free_cascade_binlog_row_events(); + } +} + /**********************************************************************//** Deletes a row given as the parameter. @return error number or 0 */ diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h index 93c442b2e72ea..6d8076210d3d4 100644 --- a/storage/innobase/handler/ha_innodb.h +++ b/storage/innobase/handler/ha_innodb.h @@ -121,6 +121,10 @@ class ha_innobase final : public handler int update_row(const uchar * old_data, const uchar * new_data) override; + void flush_pending_cascade_binlog() override; + + void discard_pending_cascade_binlog() override; + int delete_row(const uchar * buf) override; bool was_semi_consistent_read() override; @@ -515,6 +519,20 @@ class ha_innobase final : public handler /** Save CPU time with prebuilt/cached data structures */ row_prebuilt_t* m_prebuilt; +public: + row_prebuilt_t* innobase_prebuilt() const { return m_prebuilt; } + + void rebuild_template_for_cascade_binlog_row_image() + { + reset_template(); + build_template(true); + } + + void reset_template_for_cascade_binlog_row_image() + { + reset_template(); + } + /** Thread handle of the user currently using the handler; this is set in external_lock function */ THD* m_user_thd; diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h index 35e3cbe66315c..243333eed8f92 100644 --- a/storage/innobase/include/row0sel.h +++ b/storage/innobase/include/row0sel.h @@ -27,7 +27,7 @@ Created 12/19/1997 Heikki Tuuri #pragma once #include "data0data.h" -#include "que0types.h" +#include "dict0dict.h" #include "trx0types.h" #include "read0types.h" #include "row0types.h" @@ -36,6 +36,16 @@ Created 12/19/1997 Heikki Tuuri #include "btr0pcur.h" #include "row0mysql.h" +MY_ATTRIBUTE((warn_unused_result)) +bool row_sel_store_mysql_rec( + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, + const dtuple_t* vrow, + bool rec_clust, + const dict_index_t* index, + const rec_offs* offsets); + /*********************************************************************//** Creates a select node struct. @return own: select node struct */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index bef9311b81b32..5dc88011f9cbe 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -44,6 +44,14 @@ Created 3/26/1996 Heikki Tuuri struct mtr_t; struct rw_trx_hash_element_t; class ha_handler_stats; +struct TABLE; + +struct trx_cascade_binlog_row_event { + TABLE* table; + unsigned char* before_record; + unsigned char* after_record; + void* log_func; +}; /******************************************************************//** Set detailed error message for the transaction. */ @@ -962,6 +970,13 @@ struct trx_t : ilist_node<> transaction branch */ trx_mod_tables_t mod_tables; /*!< List of tables that were modified by this transaction */ + + std::vector pending_cascade_binlog_row_events; + + /** Free the events of any queued FK-cascade binlog row events and empty + the list. Used both to discard events whose row changes are being rolled + back and to reclaim memory for events that were never flushed. */ + void free_cascade_binlog_row_events(); /*------------------------------*/ char* detailed_error; /*!< detailed error message for last error, or empty. */ diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 38c47fc3b75ee..ed05f645b0d52 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -40,6 +40,10 @@ Created 4/20/1996 Heikki Tuuri #include "log0log.h" #include "eval0eval.h" #include "data0data.h" +#include "log0recv.h" +#include "handler.h" +#include "table.h" +#include "ha_innodb.h" #include "buf0lru.h" #include "fts0fts.h" #include "fts0types.h" @@ -47,12 +51,94 @@ Created 4/20/1996 Heikki Tuuri # include "btr0sea.h" #endif #include "sql_class.h" // THD +#include +#include #ifdef WITH_WSREP #include #include #include "ha_prototypes.h" #endif /* WITH_WSREP */ +TABLE *find_fk_open_table(THD *thd, const char *db, size_t db_len, + const char *table, size_t table_len); + +static bool row_ins_fk_cascade_delete_binlog_row(THD *thd, TABLE *table, + Event_log *bin_log, + binlog_cache_data *cache_data, + bool is_transactional, + ulong row_image, + const uchar *before_record, + const uchar *after_record + __attribute__((unused))) +{ + return thd_binlog_delete_row(thd, table, bin_log, cache_data, + (int) is_transactional, row_image, + before_record); +} + +static bool row_ins_fk_cascade_update_binlog_row(THD *thd, TABLE *table, + Event_log *bin_log, + binlog_cache_data *cache_data, + bool is_transactional, + ulong row_image, + const uchar *before_record, + const uchar *after_record) +{ + return thd_binlog_update_row(thd, table, bin_log, cache_data, + (int) is_transactional, row_image, + before_record, after_record); +} + +static TABLE* +row_ins_find_open_table_for_cascade_binlog( + trx_t* trx, + dict_table_t* child) +{ + THD* thd; + TABLE* mysql_table; + char db_buf[NAME_LEN + 1]; + char tbl_buf[NAME_LEN + 1]; + ulint db_buf_len; + ulint tbl_buf_len; + + thd = trx->mysql_thd; + if (thd == NULL) { + return NULL; + } + + if (!child->parse_name(db_buf, tbl_buf, &db_buf_len, &tbl_buf_len)) { + return NULL; + } + + mysql_table = find_fk_open_table(thd, + db_buf, db_buf_len, + tbl_buf, tbl_buf_len); + + return mysql_table; +} + +static inline bool +row_ins_allow_fk_cascade_binlog_for_table(const TABLE* table) +{ + if (table == NULL) { + return false; + } + + if (table->s->primary_key != MAX_KEY) { + return true; + } + + if (Field **vf = table->vfield) { + for (; *vf; vf++) { + if ((*vf)->flags & PART_KEY_FLAG) { + return false; + } + } + } + + return true; +} + /************************************************************************* IMPORTANT NOTE: Any operation that generates redo MUST check that there is enough space in the redo log before for that operation. This is @@ -269,7 +355,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_ins_clust_index_entry_by_modify( /*================================*/ - btr_pcur_t* pcur, /*!< in/out: a persistent cursor pointing + btr_pcur_t* pcur, /*!< in/out: a persistent cursor pointing to the clust_rec that is being modified. */ ulint flags, /*!< in: undo logging and locking flags */ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, @@ -299,7 +385,6 @@ row_ins_clust_index_entry_by_modify( /* In delete-marked records, DB_TRX_ID must always refer to an existing undo log record. */ ut_ad(rec_get_trx_id(rec, cursor->index())); - /* Build an update vector containing all the fields to be modified; NOTE that this vector may NOT contain system columns trx_id or roll_ptr */ @@ -1010,6 +1095,13 @@ row_ins_foreign_check_on_constraint( mem_heap_t* tmp_heap = NULL; doc_id_t doc_id = FTS_NULL_DOC_ID; + TABLE* child_mysql_table = NULL; + byte* before_mysql_rec = NULL; + byte* after_mysql_rec = NULL; + bool need_cascade_binlog = false; + bool can_cascade_binlog = false; + bool have_after_image = false; + DBUG_ENTER("row_ins_foreign_check_on_constraint"); trx = thr_get_trx(thr); @@ -1344,15 +1436,214 @@ row_ins_foreign_check_on_constraint( cascade->state = UPD_NODE_UPDATE_CLUSTERED; + if (thd_rpl_use_binlog_events_for_fk_cascade(trx->mysql_thd)) { + + child_mysql_table = row_ins_find_open_table_for_cascade_binlog(trx, table); + if (child_mysql_table != NULL) { + handler* file = child_mysql_table->file; + const bool allow_rpl_fk_cascade_binlog= + row_ins_allow_fk_cascade_binlog_for_table(child_mysql_table); + const bool emulate_binlog= +#ifdef WITH_WSREP + wsrep_emulate_binlog(trx->mysql_thd); +#else + false; +#endif + if (child_mysql_table->in_use == trx->mysql_thd + && (emulate_binlog || + thd_is_current_stmt_binlog_format_row(trx->mysql_thd)) + && ((emulate_binlog) || + (thd_rpl_use_binlog_events_for_fk_cascade(trx->mysql_thd) && + allow_rpl_fk_cascade_binlog)) + && file->prepare_for_row_logging()) { + need_cascade_binlog = true; + before_mysql_rec = static_cast( + mem_heap_alloc(tmp_heap, child_mysql_table->s->reclength)); + if (cascade->is_delete != PLAIN_DELETE) { + after_mysql_rec = static_cast( + mem_heap_alloc(tmp_heap, child_mysql_table->s->reclength)); + } + } + } + + if (need_cascade_binlog) { + ha_innobase* ib = static_cast(child_mysql_table->file); + row_prebuilt_t* prebuilt = ib->innobase_prebuilt(); + if (prebuilt != NULL && prebuilt->mysql_template != NULL) { + MY_BITMAP* old_read_set = child_mysql_table->read_set; + MY_BITMAP* old_write_set = child_mysql_table->write_set; + MY_BITMAP* old_rpl_write_set = child_mysql_table->rpl_write_set; + child_mysql_table->column_bitmaps_set_no_signal( + &child_mysql_table->tmp_set, &child_mysql_table->tmp_set); + bitmap_set_all(&child_mysql_table->tmp_set); + if (Field **vf = child_mysql_table->vfield) { + for (; *vf; vf++) { + bitmap_clear_bit(&child_mysql_table->tmp_set, (*vf)->field_index); + } + } + if (child_mysql_table->rpl_write_set == NULL) { + child_mysql_table->rpl_write_set = &child_mysql_table->tmp_set; + } + ib->rebuild_template_for_cascade_binlog_row_image(); + + mtr_start(mtr); + if (cascade->pcur->restore_position(BTR_SEARCH_LEAF, mtr) + == btr_pcur_t::SAME_ALL) { + const rec_t* before_rec = btr_pcur_get_rec(cascade->pcur); + mem_heap_t* offs_heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs_init(offsets_); + const rec_offs* offsets = rec_get_offsets( + before_rec, clust_index, offsets_, clust_index->n_core_fields, + ULINT_UNDEFINED, &offs_heap); + dict_index_t* saved_index = prebuilt->index; + prebuilt->index = clust_index; + if (!row_sel_store_mysql_rec(before_mysql_rec, prebuilt, + before_rec, NULL, true, + clust_index, offsets)) { + need_cascade_binlog = false; + } + prebuilt->index = saved_index; + if (UNIV_LIKELY_NULL(offs_heap)) { + mem_heap_free(offs_heap); + } + } else { + need_cascade_binlog = false; + } + mtr_commit(mtr); + + child_mysql_table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + child_mysql_table->rpl_write_set = old_rpl_write_set; + ib->reset_template_for_cascade_binlog_row_image(); + } else { + need_cascade_binlog = false; + } + } + } err = row_update_cascade_for_mysql(thr, cascade, foreign->foreign_table); mtr_start(mtr); + can_cascade_binlog = (err == DB_SUCCESS && need_cascade_binlog); + have_after_image = false; + + if (can_cascade_binlog && cascade->is_delete != PLAIN_DELETE) { + if (cascade->pcur->restore_position(BTR_SEARCH_LEAF, mtr) + == btr_pcur_t::SAME_ALL) { + const rec_t* after_rec = btr_pcur_get_rec(cascade->pcur); + if (page_rec_is_user_rec(after_rec) + && !rec_get_deleted_flag(after_rec, + dict_table_is_comp(table))) { + ha_innobase* ib = static_cast(child_mysql_table->file); + row_prebuilt_t* prebuilt = ib->innobase_prebuilt(); + if (prebuilt != NULL && prebuilt->mysql_template != NULL) { + MY_BITMAP* old_read_set = child_mysql_table->read_set; + MY_BITMAP* old_write_set = child_mysql_table->write_set; + MY_BITMAP* old_rpl_write_set = child_mysql_table->rpl_write_set; + child_mysql_table->column_bitmaps_set_no_signal( + &child_mysql_table->tmp_set, &child_mysql_table->tmp_set); + bitmap_set_all(&child_mysql_table->tmp_set); + if (Field **vf = child_mysql_table->vfield) { + for (; *vf; vf++) { + bitmap_clear_bit(&child_mysql_table->tmp_set, (*vf)->field_index); + } + } + if (child_mysql_table->rpl_write_set == NULL) { + child_mysql_table->rpl_write_set = &child_mysql_table->tmp_set; + } + ib->rebuild_template_for_cascade_binlog_row_image(); + + mem_heap_t* offs_heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs_init(offsets_); + const rec_offs* offsets = rec_get_offsets( + after_rec, clust_index, offsets_, + clust_index->n_core_fields, + ULINT_UNDEFINED, &offs_heap); + dict_index_t* saved_index = prebuilt->index; + prebuilt->index = clust_index; + have_after_image = row_sel_store_mysql_rec(after_mysql_rec, prebuilt, + after_rec, NULL, true, + clust_index, offsets); + prebuilt->index = saved_index; + child_mysql_table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + child_mysql_table->rpl_write_set = old_rpl_write_set; + ib->reset_template_for_cascade_binlog_row_image(); + if (UNIV_LIKELY_NULL(offs_heap)) { + mem_heap_free(offs_heap); + } + } + } + } + } + /* Restore pcur position */ if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) - != btr_pcur_t::SAME_ALL) { + != btr_pcur_t::SAME_ALL && err == DB_SUCCESS) { + err = DB_CORRUPTION; + } + + mtr_commit(mtr); + + if (can_cascade_binlog + && (cascade->is_delete == PLAIN_DELETE || have_after_image)) { + handler* file = child_mysql_table->file; + if (cascade->is_delete == PLAIN_DELETE) { + thd_binlog_mark_fk_cascade_events(trx->mysql_thd); + MY_BITMAP* old_read_set = child_mysql_table->read_set; + MY_BITMAP* old_write_set = child_mysql_table->write_set; + MY_BITMAP* old_rpl_write_set = child_mysql_table->rpl_write_set; + + child_mysql_table->column_bitmaps_set_no_signal( + &child_mysql_table->tmp_set, &child_mysql_table->tmp_set); + bitmap_set_all(&child_mysql_table->tmp_set); + if (Field **vf = child_mysql_table->vfield) { + for (; *vf; vf++) { + bitmap_clear_bit(&child_mysql_table->tmp_set, (*vf)->field_index); + } + } + if (child_mysql_table->rpl_write_set == NULL) { + child_mysql_table->rpl_write_set = &child_mysql_table->tmp_set; + } + Log_func* log_func = row_ins_fk_cascade_delete_binlog_row; + file->binlog_log_row(before_mysql_rec, NULL, log_func); + child_mysql_table->column_bitmaps_set_no_signal(old_read_set, old_write_set); + child_mysql_table->rpl_write_set = old_rpl_write_set; + } else { + thd_binlog_mark_fk_cascade_events(trx->mysql_thd); + const ulint len = child_mysql_table->s->reclength; + unsigned char* before_copy = static_cast( + my_malloc(PSI_INSTRUMENT_ME, len, MYF(MY_WME))); + unsigned char* after_copy = static_cast( + my_malloc(PSI_INSTRUMENT_ME, len, MYF(MY_WME))); + + if (before_copy != NULL && after_copy != NULL) { + memcpy(before_copy, before_mysql_rec, len); + memcpy(after_copy, after_mysql_rec, len); + + trx_cascade_binlog_row_event ev; + ev.table = child_mysql_table; + ev.before_record = before_copy; + ev.after_record = after_copy; + ev.log_func = reinterpret_cast( + row_ins_fk_cascade_update_binlog_row); + trx->pending_cascade_binlog_row_events.push_back(ev); + } else { + if (before_copy != NULL) { + my_free(before_copy); + } + if (after_copy != NULL) { + my_free(after_copy); + } + } + } + } + + mtr_start(mtr); + if (pcur->restore_position(BTR_SEARCH_LEAF, mtr) + != btr_pcur_t::SAME_ALL && err == DB_SUCCESS) { err = DB_CORRUPTION; } @@ -2562,7 +2853,7 @@ statement @return true if it is insert statement */ static bool thd_sql_is_insert(const THD *thd) noexcept { - switch (thd->lex->sql_command) { + switch (thd_sql_command(thd)) { case SQLCOM_INSERT: case SQLCOM_INSERT_SELECT: return true; diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 2fec0d025f9a4..cd531b6a6ce1e 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -3134,7 +3134,7 @@ be needed in the query. @retval true on success @retval false if not all columns could be retrieved */ MY_ATTRIBUTE((warn_unused_result)) -static bool row_sel_store_mysql_rec( +bool row_sel_store_mysql_rec( byte* mysql_rec, row_prebuilt_t* prebuilt, const rec_t* rec, diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index e0b1904175380..90142afc136ae 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -161,6 +161,21 @@ trx_init( #endif /* WITH_WSREP */ } +void trx_t::free_cascade_binlog_row_events() +{ + /* The before/after record images are allocated in + row_ins_foreign_check_on_constraint() and are normally freed when + ha_innobase::flush_pending_cascade_binlog() emits them at commit. + If the transaction rolls back, or the events are never flushed, + the buffers would otherwise leak, so free them here. + Note: my_free(NULL) is a no-op. */ + for (auto& ev : pending_cascade_binlog_row_events) { + my_free(ev.before_record); + my_free(ev.after_record); + } + pending_cascade_binlog_row_events.clear(); +} + /** For managing the life-cycle of the trx_t instance that we get from the pool. */ struct TrxFactory { @@ -179,6 +194,9 @@ struct TrxFactory { new(&trx->mod_tables) trx_mod_tables_t(); + new(&trx->pending_cascade_binlog_row_events) + std::vector(); + new(&trx->lock.table_locks) lock_list(); new(&trx->read_view) ReadView(); @@ -244,6 +262,9 @@ struct TrxFactory { trx->mod_tables.~trx_mod_tables_t(); + trx->free_cascade_binlog_row_events(); + trx->pending_cascade_binlog_row_events.~vector(); + ut_ad(!trx->read_view.is_open()); trx->lock.table_locks.~lock_list(); @@ -410,6 +431,8 @@ void trx_t::free() noexcept trx_sys.deregister_trx(this); check_unique_secondary= true; check_foreigns= true; + + free_cascade_binlog_row_events(); assert_freed(); trx_sys.rw_trx_hash.put_pins(this); mysql_thd= nullptr; @@ -930,7 +953,7 @@ trx_start_low( /* Check whether it is an AUTOCOMMIT SELECT */ if (const THD* thd = trx->mysql_thd) { trx->auto_commit = !(thd->variables.option_bits - & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) + & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) && thd->lex->sql_command == SQLCOM_SELECT; trx->read_only = (!trx->dict_operation && thd->tx_read_only) || srv_read_only_mode;