Skip to content

Commit 6fc9abc

Browse files
committed
MDEV-37662: Regression
The binary log could be corrupted when committing a large transaction (i.e. one whose data exceeds the binlog_cache_size limit and spills into a tmp file) in binlog_format=row if the server's --tmp-dir is full. The corruption that happens is only the GTID of the errored transaction would be written into the binary log, without any body/finalizing events. This would happen because the content of the transaction wasn't flushed at the proper time, and the transaction's binlog cache data was not durable while trying to copy the content from the binlog cache file into the binary log itself. While switching the tmp file from a WRITE_CACHE to a READ_CACHE, the server would see there is still data to flush in the cache, and first try to flush it. This is not a valid time to flush that data to the temporary file though, as the GTID event has already been written directly to the binary log. So if this flushing fails, it leaves the binary log in a corrupted state. The flush itself is expected to happen in THD::binlog_flush_pending_rows_event(). However, if there is no pending event, the flush is skipped.
1 parent 1ac2270 commit 6fc9abc

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#
2+
# This test ensures that a large transaction (i.e. one whose data exceeds the
3+
# binlog_cache_size limit and spills into a tmp file) that errors while
4+
# flushing the tmp file (e.g. due to no space on disk) does not corrupt the
5+
# binlog, and that the server/replication can continue working normally after
6+
# the error. To simulate the error, the test uses debug_dbug to inject a
7+
# failure in the my_write() function, which is called when flushing the tmp
8+
# file.
9+
#
10+
# References:
11+
# MDEV-37662: Binlog Corruption When tmpdir is Full
12+
#
13+
--source include/have_debug.inc
14+
--source include/have_innodb.inc
15+
--source include/have_binlog_format_row.inc
16+
--source include/master-slave.inc
17+
18+
--connection master
19+
set @old_binlog_cache_size= @@global.binlog_cache_size;
20+
set @@global.binlog_cache_size=4096;
21+
22+
--echo #
23+
--echo # Initialize test data
24+
--connection master
25+
create table t1 (a int, b longtext default NULL) engine=innodb;
26+
27+
--echo #
28+
--echo # Create transaction with cache data larger than the binlog_cache_size
29+
--echo # so it spills into a tmp file, then simulate ENOSPC while flushing
30+
--echo # the tmp file.
31+
--echo #
32+
set @@session.debug_dbug="+d,simulate_binlog_tmp_file_no_space_left_on_flush";
33+
--error 3
34+
insert into t1 values (2, repeat("y", 8192));
35+
set @@session.debug_dbug="";
36+
37+
--echo #
38+
--echo # Create another transaction to make sure the server/replication can
39+
--echo # continue working normally after the error
40+
--echo #
41+
insert into t1 values (3, repeat("z", 8192));
42+
--source include/save_master_gtid.inc
43+
--connection slave
44+
--source include/sync_with_master_gtid.inc
45+
46+
--let $diff_tables=master:test.t1,slave:test.t1
47+
--source include/diff_tables.inc
48+
49+
--echo #
50+
--echo # Cleanup
51+
--connection master
52+
drop table t1;
53+
--source include/save_master_gtid.inc
54+
55+
--connection slave
56+
--source include/sync_with_master_gtid.inc
57+
58+
--connection master
59+
set @@global.binlog_cache_size= @old_binlog_cache_size;
60+
61+
--source include/rpl_end.inc
62+
--echo # End of rpl_row_binlog_tmp_file_flush_enospc.test

mysys/my_write.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ size_t my_write(File Filedes, const uchar *Buffer, size_t Count, myf MyFlags)
5757
if (!errors) {
5858
errno= ENOSPC;
5959
writtenbytes= (size_t) -1;
60+
MyFlags&= ~MY_WAIT_IF_FULL;
6061
});
6162

6263
if (writtenbytes == Count)

sql/log.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7708,8 +7708,12 @@ int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
77087708
DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
77097709

77107710
mysql_mutex_assert_owner(&LOCK_log);
7711+
DBUG_EXECUTE_IF("simulate_binlog_tmp_file_no_space_left_on_flush",
7712+
{ DBUG_SET("+d,simulate_file_write_error"); });
77117713
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
77127714
DBUG_RETURN(ER_ERROR_ON_WRITE);
7715+
DBUG_EXECUTE_IF("simulate_binlog_tmp_file_no_space_left_on_flush",
7716+
{ DBUG_SET("-d,simulate_file_write_error"); });
77137717
size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
77147718
size_t val;
77157719
size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t

0 commit comments

Comments
 (0)