From 7e284b547a8b81f36e5f571d07e9721f192e994a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 6 Mar 2026 17:28:20 +0200 Subject: [PATCH 1/2] MDEV-38968 Redundant FILE_CHECKPOINT writes Concurrent calls to log_checkpoint_low() were possible from multiple threads, and they could cause redundant writes of FILE_CHECKPOINT records. Let us simplify the logic by making the dedicated buf_flush_page_cleaner() thread responsible for checkpoints. log_t::write_checkpoint(lsn_t end_lsn): Add the parameter checkpoint, which replaces log_sys.next_checkpoint_lsn. log_sys.checkpoint_pending: Remove. Only the buf_flush_page_cleaner thread will write checkpoints, hence there is no possibility of a race condition. log_checkpoint_low(), log_checkpoint(): Remove the return value, because there cannot be any concurrent log checkpoint in progress. buf_flush_wait(): Add special handling for log_sys.check_for_checkpoint() as well as shutdown. buf_flush_wait_flushed(): Assert that buf_flush_page_cleaner() is available. log_make_checkpoint(): Delegate all work to the page cleaner. buf_flush_sync_for_checkpoint(): Update the systemd watchdog. On shutdown, keep flushing until a checkpoint has been written. buf_flush_page_cleaner(): Revise the shutdown logic so that all changes will be written out. buf_flush_buffer_pool(): Remove. buf_flush_wait_flushed(): Require the caller to acquire buf_pool.flush_list_mutex. logs_empty_and_mark_files_at_shutdown(): Simplify the logic. fil_names_clear(): Fix an off-by-one error that would prevent removal from fil_system.named_spaces. --- extra/mariabackup/xtrabackup.cc | 20 +- .../innodb/r/log_corruption_recovery.result | 2 +- .../innodb/t/log_corruption_recovery.test | 2 +- storage/innobase/buf/buf0dblwr.cc | 2 +- storage/innobase/buf/buf0flu.cc | 272 +++++++----------- storage/innobase/fil/fil0fil.cc | 2 +- storage/innobase/fsp/fsp0sysspace.cc | 19 +- storage/innobase/include/buf0flu.h | 5 - storage/innobase/include/log0log.h | 17 +- storage/innobase/log/log0log.cc | 102 +++---- storage/innobase/log/log0recv.cc | 53 ++-- storage/innobase/srv/srv0start.cc | 1 + 12 files changed, 189 insertions(+), 308 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 9777eb6c3e504..d34c2a71cecb7 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2658,7 +2658,7 @@ static void log_hdr_init() log_sys.format == log_t::FORMAT_ENC_11 ? log_t::FORMAT_ENC_11 : log_t::FORMAT_10_8); mach_write_to_8(LOG_HEADER_START_LSN + log_hdr_buf, - log_sys.next_checkpoint_lsn); + log_sys.last_checkpoint_lsn); snprintf(reinterpret_cast(LOG_HEADER_CREATOR + log_hdr_buf), 16, "Backup %u.%u.%u", MYSQL_VERSION_ID / 10000, MYSQL_VERSION_ID / 100 % 100, @@ -2666,7 +2666,7 @@ static void log_hdr_init() if (log_sys.is_encrypted()) log_crypt_write_header(log_hdr_buf + LOG_HEADER_CREATOR_END); mach_write_to_4(508 + log_hdr_buf, my_crc32c(0, log_hdr_buf, 508)); - mach_write_to_8(log_hdr_buf + 0x1000, log_sys.next_checkpoint_lsn); + mach_write_to_8(log_hdr_buf + 0x1000, log_sys.last_checkpoint_lsn); mach_write_to_8(log_hdr_buf + 0x1008, recv_sys.lsn); mach_write_to_4(log_hdr_buf + 0x103c, my_crc32c(0, log_hdr_buf + 0x1000, 60)); @@ -2740,7 +2740,7 @@ static bool innodb_init() return true; } - recv_sys.lsn= log_sys.next_checkpoint_lsn= + recv_sys.lsn= log_sys.last_checkpoint_lsn= log_get_lsn() - SIZE_OF_FILE_CHECKPOINT; log_sys.set_latest_format(false); // not encrypted log_hdr_init(); @@ -3613,7 +3613,7 @@ static bool backup_wait_timeout(lsn_t lsn, lsn_t last_lsn) return true; msg("Was only able to copy log from " LSN_PF " to " LSN_PF ", not " LSN_PF "; try increasing innodb_log_file_size", - log_sys.next_checkpoint_lsn, last_lsn, lsn); + log_sys.last_checkpoint_lsn.load(), last_lsn, lsn); return false; } @@ -4871,9 +4871,9 @@ static bool backup_wait_for_commit_lsn() /* read the latest checkpoint lsn */ if (recv_sys.find_checkpoint() == DB_SUCCESS && log_sys.is_latest()) { - if (log_sys.next_checkpoint_lsn > lsn) - lsn= log_sys.next_checkpoint_lsn; - metadata_to_lsn= log_sys.next_checkpoint_lsn; + metadata_to_lsn= log_sys.last_checkpoint_lsn; + if (metadata_to_lsn > lsn) + lsn= metadata_to_lsn; msg("mariabackup: The latest check point (for incremental): '" LSN_PF "'", metadata_to_lsn); } @@ -5576,7 +5576,7 @@ static bool xtrabackup_backup_func() } /* label it */ - recv_sys.file_checkpoint = log_sys.next_checkpoint_lsn; + recv_sys.file_checkpoint = log_sys.last_checkpoint_lsn; log_hdr_init(); /* Write log header*/ if (ds_write(dst_log_file, log_hdr_buf, 12288)) { @@ -5607,7 +5607,7 @@ static bool xtrabackup_backup_func() mysql_mutex_lock(&recv_sys.mutex); backup_log_parse = recv_sys.get_backup_parser(); - recv_sys.lsn = log_sys.next_checkpoint_lsn; + recv_sys.lsn = log_sys.last_checkpoint_lsn; const bool log_copy_failed = xtrabackup_copy_logfile(true); @@ -5666,7 +5666,7 @@ static bool xtrabackup_backup_func() backup_datasinks.destroy(); msg("Redo log (from LSN " LSN_PF " to " LSN_PF ") was copied.", - log_sys.next_checkpoint_lsn, recv_sys.lsn); + log_sys.last_checkpoint_lsn.load(), recv_sys.lsn); xb_filters_free(); xb_data_files_close(); diff --git a/mysql-test/suite/innodb/r/log_corruption_recovery.result b/mysql-test/suite/innodb/r/log_corruption_recovery.result index 7010a257090e8..986e396d965f4 100644 --- a/mysql-test/suite/innodb/r/log_corruption_recovery.result +++ b/mysql-test/suite/innodb/r/log_corruption_recovery.result @@ -5,7 +5,7 @@ call mtr.add_suppression("InnoDB: OPT_PAGE_CHECKSUM mismatch on \\[page id: spac call mtr.add_suppression("InnoDB: Set innodb_force_recovery=1"); call mtr.add_suppression("InnoDB: Cannot apply log to \\[page id: space=127, page number=0\\] of corrupted file '.*test/t\\.ibd"); call mtr.add_suppression("(InnoDB: Plugin|Plugin 'InnoDB')"); -call mtr.add_suppression("InnoDB: Page .* Current system log sequence number 12(3(38|54|70|86)|4(02|18|34|50|66|82|98)|5(14|30|46|62|78|94)|6(10|26|42|58|74|90))"); +call mtr.add_suppression("InnoDB: Page .* Current system log sequence number 123(38|54)"); SET GLOBAL innodb_fast_shutdown=0; # restart SELECT * FROM INFORMATION_SCHEMA.ENGINES diff --git a/mysql-test/suite/innodb/t/log_corruption_recovery.test b/mysql-test/suite/innodb/t/log_corruption_recovery.test index 12d1fb44cf43b..0a7603a5ee064 100644 --- a/mysql-test/suite/innodb/t/log_corruption_recovery.test +++ b/mysql-test/suite/innodb/t/log_corruption_recovery.test @@ -15,7 +15,7 @@ call mtr.add_suppression("InnoDB: Set innodb_force_recovery=1"); call mtr.add_suppression("InnoDB: Cannot apply log to \\[page id: space=127, page number=0\\] of corrupted file '.*test/t\\.ibd"); call mtr.add_suppression("(InnoDB: Plugin|Plugin 'InnoDB')"); # Allow innodb_force_recovery=1 to write up to 10 FILE_CHECKPOINT records -call mtr.add_suppression("InnoDB: Page .* Current system log sequence number 12(3(38|54|70|86)|4(02|18|34|50|66|82|98)|5(14|30|46|62|78|94)|6(10|26|42|58|74|90))"); +call mtr.add_suppression("InnoDB: Page .* Current system log sequence number 123(38|54)"); SET GLOBAL innodb_fast_shutdown=0; --source include/shutdown_mysqld.inc --move_file $DATADIR/ib_logfile0 $DATADIR/ib_logfile0.old diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index e31cf8bb0fe11..699c4de073a8b 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -316,7 +316,7 @@ dberr_t buf_dblwr_t::init_or_load_pages(pfs_os_file_t file, const char *path) else { alignas(8) char checkpoint[8]; - mach_write_to_8(checkpoint, log_sys.next_checkpoint_lsn); + mach_write_to_8(checkpoint, log_sys.last_checkpoint_lsn); for (auto i= size * 2; i--; page += srv_page_size) if (memcmp_aligned<8>(page + FIL_PAGE_LSN, checkpoint, 8) >= 0) /* Valid pages are not older than the log checkpoint. */ diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 4368220071042..17e068d8988fc 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1568,25 +1568,6 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) noexcept return count; } -/** Wait until a LRU flush batch ends. */ -void buf_flush_wait_LRU_batch_end() noexcept -{ - mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); - mysql_mutex_assert_not_owner(&buf_pool.mutex); - - if (buf_pool.n_flush()) - { - tpool::tpool_wait_begin(); - thd_wait_begin(nullptr, THD_WAIT_DISKIO); - do - my_cond_wait(&buf_pool.done_flush_LRU, - &buf_pool.flush_list_mutex.m_mutex); - while (buf_pool.n_flush()); - tpool::tpool_wait_end(); - thd_wait_end(nullptr); - } -} - /** Write out dirty blocks from buf_pool.flush_list. The caller must invoke buf_dblwr.flush_buffered_writes() after releasing buf_pool.mutex. @@ -1812,20 +1793,17 @@ static ulint buf_flush_LRU(ulint max_n) noexcept # include "cache.h" #endif -/** Write checkpoint information to the log header and release mutex. -@param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */ -inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept + +inline void log_t::write_checkpoint(lsn_t checkpoint, lsn_t end_lsn) noexcept { ut_ad(!srv_read_only_mode); - ut_ad(end_lsn >= next_checkpoint_lsn); + ut_ad(end_lsn >= checkpoint); ut_d(const lsn_t current_lsn{get_lsn()}); ut_ad(end_lsn <= current_lsn); - ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= current_lsn || + ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT + + 8 * is_encrypted() <= current_lsn || srv_shutdown_state > SRV_SHUTDOWN_INITIATED); - DBUG_PRINT("ib_log", - ("checkpoint at " LSN_PF " written", next_checkpoint_lsn)); - auto n= next_checkpoint_no; const size_t offset{(n & 1) ? CHECKPOINT_2 : CHECKPOINT_1}; static_assert(CPU_LEVEL1_DCACHE_LINESIZE >= 64, "efficiency"); @@ -1833,7 +1811,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept byte* c= my_assume_aligned (is_mmap() ? buf + offset : checkpoint_buf); memset_aligned(c, 0, CPU_LEVEL1_DCACHE_LINESIZE); - mach_write_to_8(my_assume_aligned<8>(c), next_checkpoint_lsn); + mach_write_to_8(my_assume_aligned<8>(c), checkpoint); mach_write_to_8(my_assume_aligned<8>(c + 8), end_lsn); mach_write_to_4(my_assume_aligned<4>(c + 60), my_crc32c(0, c, 60)); @@ -1845,7 +1823,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ut_ad(!is_opened()); resizing= resize_lsn.load(std::memory_order_relaxed); - if (resizing > 1 && resizing <= next_checkpoint_lsn) + if (resizing > 1 && resizing <= checkpoint) { memcpy_aligned<64>(resize_buf + CHECKPOINT_1, c, 64); header_write(resize_buf, resizing, is_encrypted()); @@ -1857,8 +1835,6 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept #endif { ut_ad(!is_mmap()); - ut_ad(!checkpoint_pending); - checkpoint_pending= true; latch.wr_unlock(); log_write_and_flush_prepare(); resizing= resize_lsn.load(std::memory_order_relaxed); @@ -1866,7 +1842,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept ut_ad(write_size >= 512); ut_ad(write_size <= 4096); log.write(offset, {c, write_size}); - if (resizing > 1 && resizing <= next_checkpoint_lsn) + if (resizing > 1 && resizing <= checkpoint) { resize_log.write(CHECKPOINT_1, {c, write_size}); byte *buf= static_cast(aligned_malloc(4096, 4096)); @@ -1879,30 +1855,26 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept if (srv_file_flush_method != SRV_O_DSYNC) ut_a(log.flush()); latch.wr_lock(SRW_LOCK_CALL); - ut_ad(checkpoint_pending); - checkpoint_pending= false; resizing= resize_lsn.load(std::memory_order_relaxed); } - ut_ad(!checkpoint_pending); next_checkpoint_no++; - const lsn_t checkpoint_lsn{next_checkpoint_lsn}; - last_checkpoint_lsn= checkpoint_lsn; + last_checkpoint_lsn= checkpoint; DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF ", flushed to " LSN_PF, - checkpoint_lsn, get_flushed_lsn())); + checkpoint, get_flushed_lsn())); if (overwrite_warned) { sql_print_information("InnoDB: Crash recovery was broken " "between LSN=" LSN_PF " and checkpoint LSN=" LSN_PF ".", - overwrite_warned, checkpoint_lsn); + overwrite_warned, checkpoint); overwrite_warned= 0; } lsn_t resizing_completed= 0; - if (resizing > 1 && resizing <= checkpoint_lsn) + if (resizing > 1 && resizing <= checkpoint) { ut_ad(is_mmap() == !resize_flush_buf); ut_ad(is_mmap() == !resize_log.is_opened()); @@ -1979,7 +1951,7 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept log_resize_release(); if (UNIV_LIKELY(resizing <= 1)); - else if (resizing > checkpoint_lsn) + else if (resizing > checkpoint) buf_flush_ahead(resizing, false); else if (resizing_completed) ib::info() << "Resized log to " << ib::bytes_iec{resizing_completed} @@ -1990,9 +1962,8 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept /** Initiate a log checkpoint, discarding the start of the log. @param oldest_lsn the checkpoint LSN -@param end_lsn log_sys.get_lsn() -@return true if success, false if a checkpoint write was already running */ -static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept +@param end_lsn log_sys.get_lsn() */ +static void log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept { ut_ad(!srv_read_only_mode); ut_ad(log_sys.latch_have_wr()); @@ -2003,14 +1974,13 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept (oldest_lsn == end_lsn && !log_sys.resize_in_progress() && oldest_lsn == log_sys.last_checkpoint_lsn + - (log_sys.is_encrypted() - ? SIZE_OF_FILE_CHECKPOINT + 8 : SIZE_OF_FILE_CHECKPOINT))) + log_sys.is_encrypted() * 8 + SIZE_OF_FILE_CHECKPOINT)) { /* Do nothing, because nothing was logged (other than a FILE_CHECKPOINT record) since the previous checkpoint. */ do_nothing: log_sys.latch.wr_unlock(); - return true; + return; } ut_ad(!recv_no_log_write); @@ -2027,7 +1997,8 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept mtr_t::commit() in other threads will be blocked, and no pages can be added to buf_pool.flush_list. */ const lsn_t flush_lsn{fil_names_clear(oldest_lsn)}; - ut_ad(flush_lsn >= end_lsn + SIZE_OF_FILE_CHECKPOINT); + ut_ad(flush_lsn >= end_lsn + + SIZE_OF_FILE_CHECKPOINT + 8 * log_sys.is_encrypted()); log_sys.latch.wr_unlock(); log_write_up_to(flush_lsn, true); log_sys.latch.wr_lock(SRW_LOCK_CALL); @@ -2036,26 +2007,14 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) noexcept ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); - if (log_sys.checkpoint_pending) - { - /* A checkpoint write is running */ - log_sys.latch.wr_unlock(); - return false; - } - - log_sys.next_checkpoint_lsn= oldest_lsn; - log_sys.write_checkpoint(end_lsn); - - return true; + log_sys.write_checkpoint(oldest_lsn, end_lsn); } /** Make a checkpoint. Note that this function does not flush dirty blocks from the buffer pool: it only checks what is lsn of the oldest modification in the pool, and writes information about the lsn in -log file. Use log_make_checkpoint() to flush also the pool. -@retval true if the checkpoint was or had been made -@retval false if a checkpoint write was already running */ -static bool log_checkpoint() noexcept +log file. Use log_make_checkpoint() to flush also the pool. */ +static void log_checkpoint() noexcept { ut_ad(!recv_recovery_is_on()); @@ -2080,14 +2039,7 @@ static bool log_checkpoint() noexcept mysql_mutex_lock(&buf_pool.flush_list_mutex); const lsn_t oldest_lsn= buf_pool.get_oldest_modification(end_lsn); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - return log_checkpoint_low(oldest_lsn, end_lsn); -} - -/** Make a checkpoint. */ -ATTRIBUTE_COLD void log_make_checkpoint() noexcept -{ - buf_flush_wait_flushed(log_get_lsn()); - while (!log_checkpoint()); + log_checkpoint_low(oldest_lsn, end_lsn); } /** Wait for all dirty pages up to an LSN to be written out. @@ -2095,12 +2047,23 @@ NOTE: The calling thread is not allowed to hold any buffer page latches! */ static void buf_flush_wait(lsn_t lsn) noexcept { lsn_t oldest_lsn; + if (log_sys.check_for_checkpoint()) + { + if (buf_flush_sync_lsn < lsn) + goto set_target; + goto wake; + } - while ((oldest_lsn= buf_pool.get_oldest_modification(lsn)) < lsn) + while ((oldest_lsn= buf_pool.get_oldest_modification(lsn)) < lsn && + (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP || + log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT + + 8 * log_sys.is_encrypted() < lsn)) { if (buf_flush_sync_lsn < lsn) { + set_target: buf_flush_sync_lsn= lsn; + wake: buf_pool.page_cleaner_set_idle(false); pthread_cond_signal(&buf_pool.do_flush_list); my_cond_wait(&buf_pool.done_flush_list, @@ -2112,6 +2075,9 @@ static void buf_flush_wait(lsn_t lsn) noexcept mysql_mutex_unlock(&buf_pool.flush_list_mutex); os_aio_wait_until_no_pending_writes(false); mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (buf_flush_sync_lsn && srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) + my_cond_wait(&buf_pool.done_flush_list, + &buf_pool.flush_list_mutex.m_mutex); } if (oldest_lsn >= buf_flush_sync_lsn) @@ -2125,57 +2091,32 @@ static void buf_flush_wait(lsn_t lsn) noexcept @param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) noexcept { - ut_ad(sync_lsn); - ut_ad(sync_lsn < LSN_MAX); - ut_ad(!srv_read_only_mode); - - mysql_mutex_lock(&buf_pool.flush_list_mutex); + mysql_mutex_assert_owner(&buf_pool.flush_list_mutex); + ut_ad(buf_page_cleaner_is_active); - if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn) + if (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn || + log_sys.check_for_checkpoint()) { MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS); -#if 1 /* FIXME: remove this, and guarantee that the page cleaner serves us */ - if (UNIV_UNLIKELY(!buf_page_cleaner_is_active)) - { - do - { - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - ulint n_pages= buf_flush_list(srv_max_io_capacity, sync_lsn); - if (n_pages) - { - MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE, - MONITOR_FLUSH_SYNC_COUNT, - MONITOR_FLUSH_SYNC_PAGES, n_pages); - } - os_aio_wait_until_no_pending_writes(false); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - } - while (buf_pool.get_oldest_modification(sync_lsn) < sync_lsn); - } - else -#endif - { - thd_wait_begin(nullptr, THD_WAIT_DISKIO); - tpool::tpool_wait_begin(); - buf_flush_wait(sync_lsn); - tpool::tpool_wait_end(); - thd_wait_end(nullptr); - } + thd_wait_begin(nullptr, THD_WAIT_DISKIO); + tpool::tpool_wait_begin(); + buf_flush_wait(sync_lsn); + tpool::tpool_wait_end(); + thd_wait_end(nullptr); } +} +/** Make a checkpoint. */ +ATTRIBUTE_COLD void log_make_checkpoint() noexcept +{ + log_sys.latch.wr_lock(SRW_LOCK_CALL); + mysql_mutex_lock(&buf_pool.flush_list_mutex); + const lsn_t sync_lsn{log_sys.get_lsn()}; + log_sys.set_check_for_checkpoint(); + log_sys.latch.wr_unlock(); + buf_flush_wait_flushed(sync_lsn); mysql_mutex_unlock(&buf_pool.flush_list_mutex); - - if (UNIV_UNLIKELY(log_sys.last_checkpoint_lsn < sync_lsn)) - { - /* If the buffer pool was clean, no log write was guaranteed - to happen until now. There could be an outstanding FILE_CHECKPOINT - record from a previous fil_names_clear() call, which we must - write out before we can advance the checkpoint. */ - log_write_up_to(sync_lsn, true); - DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", return;); - log_checkpoint(); - } } /** Initiate more eager page flushing if the log checkpoint age is too old. @@ -2259,6 +2200,12 @@ static void buf_flush_sync_for_checkpoint(lsn_t lsn) noexcept mysql_mutex_unlock(&buf_pool.flush_list_mutex); } + if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) + { + service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, + "Waiting to flush the buffer pool"); + } + if (ulint n_flushed= buf_flush_list(srv_max_io_capacity, lsn)) { MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE, @@ -2283,28 +2230,31 @@ static void buf_flush_sync_for_checkpoint(lsn_t lsn) noexcept const lsn_t checkpoint_lsn= measure ? measure : newest_lsn; if (!recv_recovery_is_on() && - checkpoint_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT) + checkpoint_lsn > log_sys.last_checkpoint_lsn + + SIZE_OF_FILE_CHECKPOINT + 8 * log_sys.is_encrypted()) { mysql_mutex_unlock(&buf_pool.flush_list_mutex); log_checkpoint_low(checkpoint_lsn, newest_lsn); + log_sys.latch.wr_lock(SRW_LOCK_CALL); mysql_mutex_lock(&buf_pool.flush_list_mutex); - measure= buf_pool.get_oldest_modification(LSN_MAX); + measure= buf_pool.get_oldest_modification(0); } + + if (measure); + else if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP || + log_sys.get_lsn() <= log_sys.last_checkpoint_lsn + + SIZE_OF_FILE_CHECKPOINT + 8 * log_sys.is_encrypted()) + measure= LSN_MAX; else - { - log_sys.latch.wr_unlock(); - if (!measure) - measure= LSN_MAX; - } + buf_flush_sync_lsn= newest_lsn; /* After attempting log checkpoint, check if we have reached our target. */ - const lsn_t target= buf_flush_sync_lsn; - - if (measure >= target) + if (measure >= buf_flush_sync_lsn) buf_flush_sync_lsn= 0; else if (measure >= buf_flush_async_lsn) buf_flush_async_lsn= 0; + log_sys.latch.wr_unlock(); /* wake up buf_flush_wait() */ pthread_cond_broadcast(&buf_pool.done_flush_list); mysql_mutex_unlock(&buf_pool.flush_list_mutex); @@ -2569,7 +2519,7 @@ static void buf_flush_page_cleaner() noexcept mysql_mutex_lock(&buf_pool.flush_list_mutex); if (!buf_pool.need_LRU_eviction()) { - if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) break; if (buf_pool.page_cleaner_idle() && @@ -2599,23 +2549,30 @@ static void buf_flush_page_cleaner() noexcept set_almost_idle: pthread_cond_broadcast(&buf_pool.done_flush_LRU); pthread_cond_broadcast(&buf_pool.done_flush_list); - if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) - break; mysql_mutex_unlock(&buf_pool.flush_list_mutex); buf_dblwr.flush_buffered_writes(); do { - IF_DBUG(if (_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) || - _db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1)) + if (recv_recovery_is_on()) + continue; + IF_DBUG(if (log_sys.last_checkpoint_lsn && + (_db_keyword_(nullptr, "ib_log_checkpoint_avoid", 1) || + _db_keyword_(nullptr, "ib_log_checkpoint_avoid_hard", 1))) continue,); - if (!recv_recovery_is_on() && - !srv_startup_is_before_trx_rollback_phase && - srv_operation <= SRV_OPERATION_EXPORT_RESTORED) + if (log_sys.check_for_checkpoint() || + (!srv_startup_is_before_trx_rollback_phase && + srv_operation <= SRV_OPERATION_EXPORT_RESTORED)) log_checkpoint(); } while (false); + if (UNIV_UNLIKELY(srv_shutdown_state >= SRV_SHUTDOWN_LAST_PHASE)) + { + mysql_mutex_lock(&buf_pool.flush_list_mutex); + break; + } + if (!buf_pool.need_LRU_eviction()) continue; mysql_mutex_lock(&buf_pool.flush_list_mutex); @@ -2668,7 +2625,7 @@ static void buf_flush_page_cleaner() noexcept buf_pool.page_cleaner_set_idle(false); goto set_almost_idle; } - else if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) + else if (UNIV_UNLIKELY(srv_shutdown_state >= SRV_SHUTDOWN_LAST_PHASE)) break; const ulint dirty_blocks= UT_LIST_GET_LEN(buf_pool.flush_list); @@ -2769,18 +2726,7 @@ static void buf_flush_page_cleaner() noexcept goto LRU_flush; } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - - if (srv_fast_shutdown != 2) - { - buf_dblwr.flush_buffered_writes(); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - buf_flush_wait_LRU_batch_end(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - os_aio_wait_until_no_pending_writes(false); - } - - mysql_mutex_lock(&buf_pool.flush_list_mutex); + ut_ad(!buf_pool.n_flush()); lsn_limit= buf_flush_sync_lsn; if (UNIV_UNLIKELY(lsn_limit != 0)) { @@ -2825,39 +2771,15 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init() noexcept std::thread(buf_flush_page_cleaner).detach(); } -/** Flush the buffer pool on shutdown. */ -ATTRIBUTE_COLD void buf_flush_buffer_pool() noexcept -{ - ut_ad(!buf_page_cleaner_is_active); - ut_ad(!buf_flush_sync_lsn); - - service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, - "Waiting to flush the buffer pool"); - os_aio_wait_until_no_pending_reads(false); - - mysql_mutex_lock(&buf_pool.flush_list_mutex); - - while (buf_pool.get_oldest_modification(0)) - { - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - buf_flush_list(srv_max_io_capacity); - os_aio_wait_until_no_pending_writes(false); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, - "Waiting to flush " ULINTPF " pages", - UT_LIST_GET_LEN(buf_pool.flush_list)); - } - - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - ut_ad(!os_aio_pending_reads()); -} - /** Synchronously flush dirty blocks during recv_sys_t::apply(). NOTE: The calling thread is not allowed to hold any buffer page latches! */ ATTRIBUTE_COLD void buf_flush_sync_batch(lsn_t lsn) noexcept { - lsn= std::max(lsn, log_get_lsn()); + log_sys.latch.wr_lock(SRW_LOCK_CALL); mysql_mutex_lock(&buf_pool.flush_list_mutex); + lsn= std::max(lsn, log_sys.get_lsn()); + ut_ad(buf_page_cleaner_is_active); + log_sys.latch.wr_unlock(); buf_flush_wait(lsn); mysql_mutex_unlock(&buf_pool.flush_list_mutex); } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 6d2e67c427a36..6001ed2e1d76d 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -3040,7 +3040,7 @@ ATTRIBUTE_COLD lsn_t fil_names_clear(lsn_t lsn) noexcept auto next = std::next(it); ut_ad(it->max_lsn > 0); - if (it->max_lsn < lsn) { + if (it->max_lsn <= lsn) { /* The tablespace was last dirtied before the checkpoint LSN. Remove it from the list, so that if the tablespace is not going to be diff --git a/storage/innobase/fsp/fsp0sysspace.cc b/storage/innobase/fsp/fsp0sysspace.cc index bc5d2aec49eb3..84708a4268e2d 100644 --- a/storage/innobase/fsp/fsp0sysspace.cc +++ b/storage/innobase/fsp/fsp0sysspace.cc @@ -613,16 +613,14 @@ inline dberr_t SysTablespace::read_lsn_and_check_flags() return err; } - if (srv_force_recovery != 6 + if (!log_sys.file_size && log_sys.format == log_t::FORMAT_3_23 && srv_operation == SRV_OPERATION_NORMAL - && !log_sys.next_checkpoint_lsn - && log_sys.format == log_t::FORMAT_3_23) { - + && srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { log_sys.latch.wr_lock(SRW_LOCK_CALL); - /* Prepare for possible upgrade from 0-sized ib_logfile0. */ - log_sys.next_checkpoint_lsn = mach_read_from_8( + /* Upgrade from 0-sized ib_logfile0. */ + log_sys.last_checkpoint_lsn = mach_read_from_8( first_page + 26/*FIL_PAGE_FILE_FLUSH_LSN*/); - if (log_sys.next_checkpoint_lsn < 8204) { + if (log_sys.last_checkpoint_lsn < 8204) { /* Before MDEV-14425, InnoDB had a minimum LSN of 8192+12=8204. Likewise, mariadb-backup --prepare would create an empty ib_logfile0 @@ -632,10 +630,9 @@ inline dberr_t SysTablespace::read_lsn_and_check_flags() "empty, and LSN is unknown."); err = DB_CORRUPTION; } else { - log_sys.last_checkpoint_lsn = - recv_sys.lsn = recv_sys.file_checkpoint = - log_sys.next_checkpoint_lsn; - log_sys.set_recovered_lsn(log_sys.next_checkpoint_lsn); + recv_sys.lsn = recv_sys.file_checkpoint = + log_sys.last_checkpoint_lsn; + log_sys.set_recovered_lsn(recv_sys.lsn); log_sys.next_checkpoint_no = 0; } diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 791792bc0983c..ed2f2e3ec29f1 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -81,8 +81,6 @@ bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr) noexcept MY_ATTRIBUTE((warn_unused_result)); -/** Wait until a LRU flush batch ends. */ -void buf_flush_wait_LRU_batch_end() noexcept; /** Wait until all persistent pages are flushed up to a limit. @param sync_lsn buf_pool.get_oldest_modification(LSN_MAX) to wait for */ ATTRIBUTE_COLD void buf_flush_wait_flushed(lsn_t sync_lsn) noexcept; @@ -94,9 +92,6 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious) noexcept; /** Initialize page_cleaner. */ ATTRIBUTE_COLD void buf_flush_page_cleaner_init() noexcept; -/** Flush the buffer pool on shutdown. */ -ATTRIBUTE_COLD void buf_flush_buffer_pool() noexcept; - #ifdef UNIV_DEBUG /** Validate the flush list. */ void buf_flush_validate() noexcept; diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index b728ac8d823bd..5dce7e261633b 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -186,6 +186,11 @@ struct log_t public: /** innodb_log_buffer_size (usable append_prepare() size in bytes) */ unsigned buf_size; + /** set when there may be need to initiate a log checkpoint. + This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */ + std::atomic need_checkpoint; + /** next checkpoint number (protected by latch.wr_lock()) */ + byte next_checkpoint_no; /** log file size in bytes, including the header */ lsn_t file_size; @@ -230,13 +235,6 @@ struct log_t In write_buf(), buf and flush_buf may be swapped */ byte *flush_buf; - /** set when there may be need to initiate a log checkpoint. - This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */ - std::atomic need_checkpoint; - /** whether a checkpoint is pending; protected by latch.wr_lock() */ - Atomic_relaxed checkpoint_pending; - /** next checkpoint number (protected by latch.wr_lock()) */ - byte next_checkpoint_no; /** Log sequence number when a log file overwrite (broken crash recovery) was noticed. Protected by latch.wr_lock(). */ lsn_t overwrite_warned; @@ -245,8 +243,6 @@ struct log_t Atomic_relaxed last_checkpoint_lsn; /** The log writer (protected by latch.wr_lock()) */ lsn_t (*writer)() noexcept; - /** next checkpoint LSN (protected by latch.wr_lock()) */ - lsn_t next_checkpoint_lsn; /** Log file */ log_file_t log; @@ -541,8 +537,9 @@ struct log_t } /** Write checkpoint information and invoke latch.wr_unlock(). + @param checkpoint the new checkpoint LSN @param end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */ - inline void write_checkpoint(lsn_t end_lsn) noexcept; + inline void write_checkpoint(lsn_t checkpoint, lsn_t end_lsn) noexcept; /** Variations of write_buf() */ enum resizing_and_latch { diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 0d5ad5b57eca9..19b447496b33e 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -97,7 +97,7 @@ void log_t::create() noexcept /* LSN 0 and 1 are reserved; @see buf_page_t::oldest_modification_ */ base_lsn.store(FIRST_LSN, std::memory_order_relaxed); flushed_to_disk_lsn.store(FIRST_LSN, std::memory_order_relaxed); - need_checkpoint.store(true, std::memory_order_relaxed); + need_checkpoint.store(false, std::memory_order_relaxed); write_lsn= FIRST_LSN; ut_ad(!checkpoint_buf); @@ -113,8 +113,6 @@ void log_t::create() noexcept log_capacity= 0; max_modified_age_async= 0; max_checkpoint_age= 0; - next_checkpoint_lsn= 0; - checkpoint_pending= false; ut_ad(is_initialised()); } @@ -1325,10 +1323,12 @@ ATTRIBUTE_COLD static void log_checkpoint_margin() noexcept } DBUG_EXECUTE_IF("ib_log_checkpoint_avoid_hard", goto skip_checkpoint;); + mysql_mutex_lock(&buf_pool.flush_list_mutex); log_sys.latch.wr_unlock(); /* We must wait to prevent the tail of the log overwriting the head. */ buf_flush_wait_flushed(lsn - max_age); + mysql_mutex_unlock(&buf_pool.flush_list_mutex); /* Sleep to avoid a thundering herd */ std::this_thread::sleep_for(std::chrono::milliseconds(10)); } @@ -1356,10 +1356,10 @@ inline void buf_mem_pressure_shutdown() noexcept {} /** Make a checkpoint at the latest lsn on shutdown. */ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept { - lsn_t lsn; ulint count = 0; ib::info() << "Starting shutdown..."; + ut_ad(buf_pool.is_initialised() || !srv_was_started); /* Wait until the master thread and all other operations are idle: our algorithm only works if the server is idle at shutdown */ @@ -1385,18 +1385,17 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept srv_shutdown(srv_fast_shutdown == 0); } + constexpr ulint COUNT_INTERVAL{600}; + if (false) { + loop: + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + count++; + } -loop: ut_ad(lock_sys.is_initialised() || !srv_was_started); ut_ad(log_sys.is_initialised() || !srv_was_started); ut_ad(fil_system.is_initialised() || !srv_was_started); -#define COUNT_INTERVAL 600U -#define CHECK_INTERVAL 100000U - std::this_thread::sleep_for(std::chrono::microseconds(CHECK_INTERVAL)); - - count++; - /* Check that there are no longer transactions, except for PREPARED ones. We need this wait even for the 'very fast' shutdown, because the InnoDB layer may have committed or @@ -1408,12 +1407,12 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept if (srv_print_verbose_log && count > COUNT_INTERVAL) { service_manager_extend_timeout( - COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2, - "Waiting for %lu active transactions to finish", - (ulong) total_trx); - ib::info() << "Waiting for " << total_trx << " active" - << " transactions to finish"; - + COUNT_INTERVAL / 5, + "Waiting for %zu active transactions to finish", + total_trx); + sql_print_information("InnoDB: Waiting for %zu active" + " transactions to finish", + total_trx); count = 0; } @@ -1429,7 +1428,7 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept ut_ad(!srv_read_only_mode); wait_suspend_loop: service_manager_extend_timeout( - COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2, + COUNT_INTERVAL / 5, "Waiting for %s to exit", thread_name); if (srv_print_verbose_log && count > COUNT_INTERVAL) { ib::info() << "Waiting for " << thread_name @@ -1448,66 +1447,37 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() noexcept goto wait_suspend_loop; } - if (buf_page_cleaner_is_active) { - thread_name = "page cleaner thread"; - pthread_cond_signal(&buf_pool.do_flush_list); - goto wait_suspend_loop; - } + if (buf_pool.is_initialised()) { + if (srv_fast_shutdown != 2 && !srv_read_only_mode + && srv_was_started) { + log_sys.set_check_for_checkpoint(); + buf_flush_sync_batch(0); + } - buf_load_dump_end(); + buf_load_dump_end(); - if (!buf_pool.is_initialised()) { - ut_ad(!srv_was_started); - } else { - buf_flush_buffer_pool(); - } + mysql_mutex_lock(&buf_pool.flush_list_mutex); + srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; + while (buf_page_cleaner_is_active) { + pthread_cond_signal(&buf_pool.do_flush_list); + my_cond_wait(&buf_pool.done_flush_list, + &buf_pool.flush_list_mutex.m_mutex); + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (srv_fast_shutdown == 2 || !srv_was_started) { - if (!srv_read_only_mode && srv_was_started) { + if (srv_fast_shutdown == 2 && !srv_read_only_mode) { sql_print_information( "InnoDB: Executing innodb_fast_shutdown=2." " Next startup will execute crash recovery!"); - - /* In this fastest shutdown we do not flush the - buffer pool: - - it is essentially a 'crash' of the InnoDB server. - Make sure that the log is all flushed to disk, so - that we can recover all committed transactions in - a crash recovery. */ log_buffer_flush_to_disk(); } - - srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; - return; } - if (!srv_read_only_mode) { - service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL, - "ensuring dirty buffer pool are written to log"); - log_make_checkpoint(); - - const auto sizeof_cp = log_sys.is_encrypted() - ? SIZE_OF_FILE_CHECKPOINT + 8 - : SIZE_OF_FILE_CHECKPOINT; - - log_sys.latch.wr_lock(SRW_LOCK_CALL); - - lsn = log_sys.get_lsn(); - - const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn - && lsn != log_sys.last_checkpoint_lsn + sizeof_cp; - ut_ad(lsn >= log_sys.last_checkpoint_lsn); - - log_sys.latch.wr_unlock(); - - if (lsn_changed) { - goto loop; - } - } else { - lsn = recv_sys.lsn; + if (srv_fast_shutdown == 2 || !srv_was_started) { + return; } + const lsn_t lsn{log_get_lsn()}; srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; /* Make some checks that the server really is quiet */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index ccbb8faf4c727..b20242fb8ef7b 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1688,7 +1688,7 @@ ATTRIBUTE_COLD static dberr_t recv_log_recover_pre_10_2() if (o >= 0x80c && (o & ~511) + 512 < log_size) { max_no= checkpoint_no; - log_sys.next_checkpoint_lsn= mach_read_from_8(buf + CHECKPOINT_LSN); + log_sys.last_checkpoint_lsn= mach_read_from_8(buf + CHECKPOINT_LSN); source_offset= o; } } @@ -1697,7 +1697,7 @@ ATTRIBUTE_COLD static dberr_t recv_log_recover_pre_10_2() ? "InnoDB: Upgrade after a crash is not supported." : "mariadb-backup --prepare is not possible."; - if (!log_sys.next_checkpoint_lsn) + if (!log_sys.last_checkpoint_lsn) { sql_print_error("%s" " This redo log was created before MariaDB 10.2.2," @@ -1720,7 +1720,7 @@ ATTRIBUTE_COLD static dberr_t recv_log_recover_pre_10_2() if (log_block_calc_checksum_format_0(buf) != mach_read_from_4(my_assume_aligned<4>(buf + 508)) && - !log_crypt_101_read_block(buf, log_sys.next_checkpoint_lsn)) + !log_crypt_101_read_block(buf, log_sys.last_checkpoint_lsn)) { sql_print_error("%s%s, and it appears corrupted.", uag, pre_10_2); return DB_CORRUPTION; @@ -1767,7 +1767,7 @@ static dberr_t recv_log_recover_10_5(lsn_t lsn_offset) } if (log_sys.is_encrypted() && - !log_decrypt(buf, log_sys.next_checkpoint_lsn & ~511, 512)) + !log_decrypt(buf, log_sys.last_checkpoint_lsn & ~511, 512)) return DB_ERROR; /* On a clean shutdown, the redo log will be logically empty @@ -1842,14 +1842,14 @@ dberr_t recv_sys_t::find_checkpoint() { if (wrong_size) return DB_CORRUPTION; - lsn= log_sys.next_checkpoint_lsn; + lsn= log_sys.last_checkpoint_lsn; log_sys.format= log_t::FORMAT_3_23; goto upgrade; } } else ut_ad(srv_operation == SRV_OPERATION_BACKUP); - log_sys.next_checkpoint_lsn= 0; + log_sys.last_checkpoint_lsn= 0; lsn= 0; buf= my_assume_aligned<4096>(log_sys.buf); if (!log_sys.is_mmap()) @@ -1867,9 +1867,8 @@ dberr_t recv_sys_t::find_checkpoint() upgrade: memset_aligned<4096>(const_cast(field_ref_zero), 0, 4096); /* Mark the redo log for upgrading. */ - log_sys.last_checkpoint_lsn= log_sys.next_checkpoint_lsn; - log_sys.set_recovered_lsn(log_sys.next_checkpoint_lsn); - lsn= file_checkpoint= log_sys.next_checkpoint_lsn; + lsn= file_checkpoint= log_sys.last_checkpoint_lsn; + log_sys.set_recovered_lsn(lsn); if (UNIV_LIKELY(lsn != 0)) scanned_lsn= lsn; log_sys.next_checkpoint_no= 0; @@ -1937,14 +1936,14 @@ dberr_t recv_sys_t::find_checkpoint() continue; } - if (checkpoint_lsn >= log_sys.next_checkpoint_lsn) + if (checkpoint_lsn >= log_sys.last_checkpoint_lsn) { - log_sys.next_checkpoint_lsn= checkpoint_lsn; + log_sys.last_checkpoint_lsn= checkpoint_lsn; log_sys.next_checkpoint_no= field == log_t::CHECKPOINT_1; lsn= end_lsn; } } - if (!log_sys.next_checkpoint_lsn) + if (!log_sys.last_checkpoint_lsn) goto got_no_checkpoint; if (!memcmp(creator, "Backup ", 7)) srv_start_after_restore= true; @@ -1997,14 +1996,14 @@ dberr_t recv_sys_t::find_checkpoint() if (checkpoint_no >= max_no && o >= 0x80c && (o & ~511) + 512 < log_size) { max_no= checkpoint_no; - log_sys.next_checkpoint_lsn= checkpoint_lsn; + log_sys.last_checkpoint_lsn= checkpoint_lsn; log_sys.next_checkpoint_no= field == 512; lsn_offset= mach_read_from_8(b + 16); } } } - if (!log_sys.next_checkpoint_lsn) + if (!log_sys.last_checkpoint_lsn) { got_no_checkpoint: sql_print_error("InnoDB: No valid checkpoint was found;" @@ -2565,7 +2564,7 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse(source l, bool if_exists) (srv_operation == SRV_OPERATION_BACKUP || srv_operation == SRV_OPERATION_BACKUP_NO_DEFER)); mysql_mutex_assert_owner(&mutex); - ut_ad(log_sys.next_checkpoint_lsn); + ut_ad(log_sys.last_checkpoint_lsn); ut_ad(log_sys.is_recoverable()); ut_ad(log_sys.format == format); @@ -2818,7 +2817,7 @@ log_parse_file(const page_id_t id, bool if_exists, { if (UNIV_UNLIKELY(srv_print_verbose_log == 2)) fprintf(stderr, "FILE_CHECKPOINT(" LSN_PF ") %s at " LSN_PF "\n", - c, c != log_sys.next_checkpoint_lsn + c, c != log_sys.last_checkpoint_lsn ? "ignored" : recv_sys.file_checkpoint ? "reread" : "read", recv_sys.lsn); @@ -2828,7 +2827,7 @@ log_parse_file(const page_id_t id, bool if_exists, ? "ignored" : recv_sys.file_checkpoint ? "reread" : "read", recv_sys.lsn)); - if (c == log_sys.next_checkpoint_lsn) + if (c == log_sys.last_checkpoint_lsn) { /* There can be multiple FILE_CHECKPOINT for the same LSN. */ if (!recv_sys.file_checkpoint) @@ -4340,7 +4339,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) if (UNIV_UNLIKELY(!recv_needed_recovery)) { ut_ad(!last_phase); - ut_ad(recv_sys.lsn >= log_sys.next_checkpoint_lsn); + ut_ad(recv_sys.lsn >= log_sys.last_checkpoint_lsn); if (!store) { @@ -4368,7 +4367,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) { recv_sys.set_corrupt_log(); sql_print_error("InnoDB: Missing FILE_CHECKPOINT(" LSN_PF - ") at " LSN_PF, log_sys.next_checkpoint_lsn, + ") at " LSN_PF, log_sys.last_checkpoint_lsn, recv_sys.lsn); } mysql_mutex_unlock(&recv_sys.mutex); @@ -4393,7 +4392,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) } sql_print_information("InnoDB: Starting crash recovery from" " checkpoint LSN=" LSN_PF, - log_sys.next_checkpoint_lsn); + log_sys.last_checkpoint_lsn); } } } @@ -4788,13 +4787,14 @@ inline void log_t::set_recovered() noexcept inline bool recv_sys_t::validate_checkpoint() const noexcept { - if (lsn >= file_checkpoint && lsn >= log_sys.next_checkpoint_lsn) + const lsn_t last_checkpoint_lsn{log_sys.last_checkpoint_lsn}; + if (lsn >= file_checkpoint && lsn >= last_checkpoint_lsn) return false; sql_print_error("InnoDB: The log was only scanned up to " LSN_PF ", while the current LSN at the " "time of the latest checkpoint " LSN_PF " was " LSN_PF "!", - lsn, log_sys.next_checkpoint_lsn, file_checkpoint); + lsn, last_checkpoint_lsn, file_checkpoint); return true; } @@ -4858,8 +4858,7 @@ dberr_t recv_recovery_from_checkpoint_start() if (log_sys.is_recoverable()) { const bool rewind = recv_sys.lsn - != log_sys.next_checkpoint_lsn; - log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn; + != log_sys.last_checkpoint_lsn; parser[false] = get_parse_mmap(); parser[true] = get_parse_mmap(); recv_scan_log(false, parser); @@ -4881,7 +4880,7 @@ dberr_t recv_recovery_from_checkpoint_start() ut_ad(recv_sys.file_checkpoint); ut_ad(log_sys.get_flushed_lsn() >= recv_sys.scanned_lsn); if (rewind) { - recv_sys.lsn = log_sys.next_checkpoint_lsn; + recv_sys.lsn = log_sys.last_checkpoint_lsn; recv_sys.offset = 0; recv_sys.len = 0; } @@ -4943,7 +4942,7 @@ dberr_t recv_recovery_from_checkpoint_start() mysql_mutex_lock(&recv_sys.mutex); ut_ad(log_sys.get_flushed_lsn() >= recv_sys.lsn); recv_sys.clear(); - recv_sys.lsn = log_sys.next_checkpoint_lsn; + recv_sys.lsn = log_sys.last_checkpoint_lsn; mysql_mutex_unlock(&recv_sys.mutex); } @@ -5167,7 +5166,7 @@ const byte *recv_dblwr_t::find_page(const page_id_t page_id, lsn_t max_lsn, continue; } - if (lsn > max_lsn || lsn < log_sys.next_checkpoint_lsn || + if (lsn > max_lsn || lsn < log_sys.last_checkpoint_lsn || !validate_page(page_id, max_lsn, space, page, tmp_buf)) { /* Mark processed for subsequent iterations in buf_dblwr_t::recover() */ diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index b99915dbb7d82..9dcdb86037ee9 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -245,6 +245,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn) we can clear the flag without risking any race condition with buf_page_t::read_complete(). */ recv_sys.recovery_on = false; + log_sys.set_check_for_checkpoint(); log_sys.latch.wr_unlock(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); From 5020e63c2f310892abd16c4ae5cbcb2a47329d8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Sat, 7 Mar 2026 11:49:18 +0200 Subject: [PATCH 2/2] fixup! 7e284b547a8b81f36e5f571d07e9721f192e994a Fix the compilation --- storage/innobase/log/log0recv.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index b20242fb8ef7b..9b2d2ce1e3acb 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2821,12 +2821,6 @@ log_parse_file(const page_id_t id, bool if_exists, ? "ignored" : recv_sys.file_checkpoint ? "reread" : "read", recv_sys.lsn); - DBUG_PRINT("ib_log", - ("FILE_CHECKPOINT(" LSN_PF ") %s at " LSN_PF, - c, c != log_sys.next_checkpoint_lsn - ? "ignored" : recv_sys.file_checkpoint ? "reread" : "read", - recv_sys.lsn)); - if (c == log_sys.last_checkpoint_lsn) { /* There can be multiple FILE_CHECKPOINT for the same LSN. */ @@ -4367,7 +4361,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) { recv_sys.set_corrupt_log(); sql_print_error("InnoDB: Missing FILE_CHECKPOINT(" LSN_PF - ") at " LSN_PF, log_sys.last_checkpoint_lsn, + ") at " LSN_PF, log_sys.last_checkpoint_lsn.load(), recv_sys.lsn); } mysql_mutex_unlock(&recv_sys.mutex); @@ -4392,7 +4386,7 @@ static bool recv_scan_log(bool last_phase, const recv_sys_t::parser *parser) } sql_print_information("InnoDB: Starting crash recovery from" " checkpoint LSN=" LSN_PF, - log_sys.last_checkpoint_lsn); + log_sys.last_checkpoint_lsn.load()); } } }