diff --git a/extra/mariabackup/innobackupex.cc b/extra/mariabackup/innobackupex.cc index 20f87354fd079..2d7c24db5a3d7 100644 --- a/extra/mariabackup/innobackupex.cc +++ b/extra/mariabackup/innobackupex.cc @@ -44,8 +44,8 @@ Street, Fifth Floor, Boston, MA 02110-1335 USA #include #include #include -#include #include +#include "buf0buf.h" #include #include #include @@ -608,8 +608,9 @@ static struct my_option ibx_long_options[] = "--apply-log.", (uchar*) &ibx_xtrabackup_use_memory, (uchar*) &ibx_xtrabackup_use_memory, - 0, GET_LL, REQUIRED_ARG, 100*1024*1024L, 1024*1024L, LONGLONG_MAX, 0, - 1024*1024L, 0}, + 0, GET_LL, REQUIRED_ARG, 96 << 20, + innodb_buffer_pool_extent_size, SIZE_T_MAX, 0, + innodb_buffer_pool_extent_size, 0}, {"innodb-force-recovery", OPT_INNODB_FORCE_RECOVERY, "This option starts up the embedded InnoDB instance in crash " diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index 163d82fa807c9..461bbd748b3ec 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -1284,8 +1284,9 @@ struct my_option xb_client_options[]= { "The value is used in place of innodb_buffer_pool_size. " "This option is only relevant when the --prepare option is specified.", (G_PTR *) &xtrabackup_use_memory, (G_PTR *) &xtrabackup_use_memory, 0, - GET_LL, REQUIRED_ARG, 100 * 1024 * 1024L, 1024 * 1024L, LONGLONG_MAX, 0, - 1024 * 1024L, 0}, + GET_ULL, REQUIRED_ARG, 96 << 20, innodb_buffer_pool_extent_size, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), + 0, innodb_buffer_pool_extent_size, 0}, {"throttle", OPT_XTRA_THROTTLE, "limit count of IO operations (pairs of read&write) per second to IOS " "values (for '--backup')", @@ -2327,7 +2328,7 @@ static bool innodb_init_param() } srv_sys_space.normalize_size(); - srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift); + srv_lock_table_size = 5 * buf_pool.curr_size(); /* -------------- Log files ---------------------------*/ @@ -2349,11 +2350,8 @@ static bool innodb_init_param() srv_adaptive_flushing = FALSE; - /* We set srv_pool_size here in units of 1 kB. InnoDB internally - changes the value so that it becomes the number of database pages. */ - - srv_buf_pool_size = (ulint) xtrabackup_use_memory; - srv_buf_pool_chunk_unit = (ulong)srv_buf_pool_size; + buf_pool.size_in_bytes_max = size_t(xtrabackup_use_memory); + buf_pool.size_in_bytes_requested = buf_pool.size_in_bytes_max; srv_n_read_io_threads = (uint) innobase_read_io_threads; srv_n_write_io_threads = (uint) innobase_write_io_threads; diff --git a/include/my_sys.h b/include/my_sys.h index 4c371244ab966..15743358b3996 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -172,9 +172,15 @@ extern void my_free(void *ptr); extern void *my_memdup(PSI_memory_key key, const void *from,size_t length,myf MyFlags); extern char *my_strdup(PSI_memory_key key, const char *from,myf MyFlags); extern char *my_strndup(PSI_memory_key key, const char *from, size_t length, myf MyFlags); +extern my_bool my_use_large_pages; -int my_init_large_pages(my_bool super_large_pages); +int my_init_large_pages(void); uchar *my_large_malloc(size_t *size, myf my_flags); +#ifdef _WIN32 +/* On Windows, use my_virtual_mem_reserve() and my_virtual_mem_commit(). */ +#else +char *my_large_virtual_alloc(size_t *size); +#endif void my_large_free(void *ptr, size_t size); #ifdef _WIN32 diff --git a/include/my_virtual_mem.h b/include/my_virtual_mem.h new file mode 100644 index 0000000000000..b4f26ca979ca7 --- /dev/null +++ b/include/my_virtual_mem.h @@ -0,0 +1,37 @@ +/* Copyright (c) 2025, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#pragma once +/* + Functionality for handling virtual memory + (reserve, commit, decommit, release) +*/ +#include /*size_t*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +char *my_virtual_mem_reserve(size_t *size); +#endif +char *my_virtual_mem_commit(char *ptr, size_t size); +void my_virtual_mem_decommit(char *ptr, size_t size); +void my_virtual_mem_release(char *ptr, size_t size); + +#ifdef __cplusplus +} +#endif + diff --git a/mysql-test/main/large_pages.opt b/mysql-test/main/large_pages.opt index 857c9c1ecee1d..bff2c2f689899 100644 --- a/mysql-test/main/large_pages.opt +++ b/mysql-test/main/large_pages.opt @@ -1 +1 @@ ---large-pages +--large-pages --loose-innodb-buffer-pool-size-max=16m diff --git a/mysql-test/main/large_pages.result b/mysql-test/main/large_pages.result index 9d03e646ddfd9..c5e73f044a9a7 100644 --- a/mysql-test/main/large_pages.result +++ b/mysql-test/main/large_pages.result @@ -1,4 +1,5 @@ call mtr.add_suppression("\\[Warning\\] (mysqld|mariadbd): Couldn't allocate [0-9]+ bytes \\((Large/HugeTLB memory|MEMLOCK) page size [0-9]+\\).*"); +call mtr.add_suppression("\\[ERROR\\]*Lock Pages in memory access rights required.*"); create table t1 ( a int not null auto_increment, b char(16) not null, diff --git a/mysql-test/main/large_pages.test b/mysql-test/main/large_pages.test index 136273a282118..7c0f497c6d31a 100644 --- a/mysql-test/main/large_pages.test +++ b/mysql-test/main/large_pages.test @@ -1,11 +1,9 @@ # Test of large pages (or at least the fallback to conventional allocation) -# Windows needs SeLockMemoryPrivilege ---source include/not_windows.inc --source include/have_innodb.inc call mtr.add_suppression("\\[Warning\\] (mysqld|mariadbd): Couldn't allocate [0-9]+ bytes \\((Large/HugeTLB memory|MEMLOCK) page size [0-9]+\\).*"); - +call mtr.add_suppression("\\[ERROR\\]*Lock Pages in memory access rights required.*"); create table t1 ( a int not null auto_increment, b char(16) not null, diff --git a/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt index 70797302d01e7..788e69a612dc4 100644 --- a/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt +++ b/mysql-test/suite/encryption/t/innodb_encrypt_temporary_tables.opt @@ -1,2 +1,2 @@ ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M --innodb_encrypt_temporary_tables=1 diff --git a/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result b/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result index 1d631198990b4..27d76630865b4 100644 --- a/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result +++ b/mysql-test/suite/galera/r/galera_sst_mariabackup_use_memory.result @@ -8,6 +8,6 @@ connection node_1; Cleaning var directory ... connection node_2; Starting server ... -include/assert_grep.inc [mariabackup: Using 128974848 bytes for buffer pool \(set by --use-memory parameter\)] +include/assert_grep.inc [mariabackup: Using 125829120 bytes for buffer pool \(set by --use-memory parameter\)] disconnect node_2; disconnect node_1; diff --git a/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf b/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf index 05a647eb09036..6efe3caca4cbc 100644 --- a/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf +++ b/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.cnf @@ -5,4 +5,4 @@ wsrep_sst_method=mariabackup wsrep_sst_auth="root:" [mariabackup] -use_memory=123m +use_memory=121m diff --git a/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test b/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test index f6ba873976ed8..b949e44ddf1ad 100644 --- a/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test +++ b/mysql-test/suite/galera/t/galera_sst_mariabackup_use_memory.test @@ -40,8 +40,8 @@ let $restart_noprint=2; --source include/wait_condition.inc # Confirm that IST did not take place ---let $assert_text = mariabackup: Using 128974848 bytes for buffer pool \(set by --use-memory parameter\) ---let $assert_select = mariabackup: Using 128974848 bytes for buffer pool \(set by --use-memory parameter\) +--let $assert_text = mariabackup: Using 125829120 bytes for buffer pool \(set by --use-memory parameter\) +--let $assert_select = mariabackup: Using 125829120 bytes for buffer pool \(set by --use-memory parameter\) --let $assert_count = 1 --let $assert_file = $MYSQLTEST_VARDIR/mysqld.2/data/mariabackup.prepare.log --let $assert_only_after = Starting InnoDB instance for recovery diff --git a/mysql-test/suite/innodb/r/buf_pool_resize_oom.result b/mysql-test/suite/innodb/r/buf_pool_resize_oom.result deleted file mode 100644 index 0bff75701a098..0000000000000 --- a/mysql-test/suite/innodb/r/buf_pool_resize_oom.result +++ /dev/null @@ -1,8 +0,0 @@ -# -# Bug #21348684 SIGABRT DURING RESIZING THE INNODB BUFFER POOL -# ONLINE WITH MEMORY FULL CONDITION -# -call mtr.add_suppression("InnoDB: failed to allocate the chunk array"); -SET GLOBAL debug_dbug='+d,buf_pool_resize_chunk_null'; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + 1048576; -# restart diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result index 1a8f16e4efb20..ea053308b2e94 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_fail.result @@ -1,4 +1,4 @@ -call mtr.add_suppression("InnoDB: Cannot allocate memory for the buffer pool"); +call mtr.add_suppression("InnoDB: Cannot map innodb_buffer_pool_size_max="); call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc.*"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); @@ -6,4 +6,4 @@ call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE faile # MDEV-25019 memory allocation failures during startup cause server failure in different, confusing ways # # restart: --debug_dbug=+d,ib_buf_chunk_init_fails -FOUND 1 /\[ERROR\] InnoDB: Cannot allocate memory for the buffer pool/ in mysqld.1.err +FOUND 1 /\[ERROR\] InnoDB: Cannot map innodb_buffer_pool_size_max=16m/ in mysqld.1.err diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result index 667d31a0b69fe..7b254daf4925d 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize.result @@ -1,34 +1,60 @@ +# +# MDEV-29445: Reorganize buffer pool (and remove chunks) +# set global innodb_adaptive_hash_index=ON; select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 8388608 +set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '10485760' select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size -16777216 -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; -set @`v_id` := 0; -set @`v_val` := 0; -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; +10485760 +create table t1 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +create table t2 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$kbs; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t2 SELECT seq*4,seq*4 FROM seq_1_to_16384; +SELECT @@GLOBAL.innodb_adaptive_hash_index; +@@GLOBAL.innodb_adaptive_hash_index +1 +SET STATEMENT max_statement_time=1e-9 FOR +SET GLOBAL innodb_buffer_pool_size = 7340032; +SELECT @@GLOBAL.innodb_adaptive_hash_index; +@@GLOBAL.innodb_adaptive_hash_index +1 +FOUND 1 /innodb_buffer_pool_size=7m.*resized from|innodb_buffer_pool_size change aborted/ in mysqld.1.err set global innodb_buffer_pool_size = 7340032; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '7340032' -select @@innodb_buffer_pool_size; -@@innodb_buffer_pool_size -8388608 select count(val) from t1; count(val) 262144 +select count(val) from t2; +count(val) +16384 set global innodb_adaptive_hash_index=OFF; -set global innodb_buffer_pool_size = 25165824; +set global innodb_buffer_pool_size = 24117248; +set global innodb_buffer_pool_size = 26214400; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '26214400' select @@innodb_buffer_pool_size; @@innodb_buffer_pool_size 25165824 select count(val) from t1; count(val) 262144 -drop table t1; -drop view view0; +select count(val) from t2; +count(val) +16384 +drop table t1,t2; +SET GLOBAL innodb_max_purge_lag_wait = 0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result deleted file mode 100644 index d6b29060dc731..0000000000000 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_bigtest.result +++ /dev/null @@ -1,14 +0,0 @@ -SET @save_size=@@innodb_buffer_pool_size; -# -# MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize -# after or during DROP TABLE -# -select @@innodb_buffer_pool_chunk_size; -@@innodb_buffer_pool_chunk_size -1048576 -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET GLOBAL innodb_buffer_pool_size=256*1024*1024; -DROP TABLE t1; -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size; -# End of 10.6 tests -SET GLOBAL innodb_buffer_pool_size=@save_size; diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result index 43d4802395805..12fb02a40c757 100644 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_temporary.result @@ -2,9 +2,44 @@ SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; SET @save_size=@@GLOBAL.innodb_buffer_pool_size; SET GLOBAL innodb_limit_optimistic_insert_debug=2; SET GLOBAL innodb_buffer_pool_size=16777216; +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); +SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; +SET GLOBAL innodb_adaptive_hash_index = ON; +SET STATEMENT debug_dbug='+d,buf_shrink_fail' FOR +SET GLOBAL innodb_buffer_pool_size=8388608; +ERROR HY000: innodb_buffer_pool_size change aborted +SELECT @@GLOBAL.innodb_adaptive_hash_index,@@GLOBAL.innodb_buffer_pool_size; +@@GLOBAL.innodb_adaptive_hash_index @@GLOBAL.innodb_buffer_pool_size +1 16777216 +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; CREATE TEMPORARY TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 SELECT seq FROM seq_1_to_200; +SET GLOBAL innodb_max_purge_lag_wait=0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status +connect con1,localhost,root; +SET DEBUG_SYNC='buf_pool_shrink_before_wakeup SIGNAL blocked WAIT_FOR go'; SET GLOBAL innodb_buffer_pool_size=8388608; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status Withdrawing blocks. (505/505). +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +disconnect con1; +connection default; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +Variable_name Value +Innodb_buffer_pool_resize_status +SET DEBUG_SYNC=RESET; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; SELECT COUNT(*),MIN(a),MAX(a) FROM t1; COUNT(*) MIN(a) MAX(a) 200 1 200 diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result deleted file mode 100644 index efb652091bf22..0000000000000 --- a/mysql-test/suite/innodb/r/innodb_buffer_pool_resize_with_chunks.result +++ /dev/null @@ -1,26 +0,0 @@ -select @@innodb_buffer_pool_chunk_size; -@@innodb_buffer_pool_chunk_size -4194304 -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; -set @`v_id` := 0; -set @`v_val` := 0; -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; -set global innodb_buffer_pool_size = 7340032; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '7340032' -select count(val) from t1; -count(val) -262144 -set global innodb_buffer_pool_size = 16777216; -select count(val) from t1; -count(val) -262144 -drop table t1; -drop view view0; -set global innodb_buffer_pool_size = 2*1048576; -Warnings: -Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '2097152' -select @@innodb_buffer_pool_size; -@@innodb_buffer_pool_size -4194304 diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_shrink.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_shrink.result new file mode 100644 index 0000000000000..187dcfbd58754 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_shrink.result @@ -0,0 +1,11 @@ +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); +CREATE TABLE t (c INT) ENGINE=InnoDB PARTITION BY HASH(c) PARTITIONS 512; +BEGIN; +SELECT * FROM t LOCK IN SHARE MODE; +c +SET @save_size = @@GLOBAL.innodb_buffer_pool_size; +SET GLOBAL innodb_buffer_pool_size=6291456; +COMMIT; +SET GLOBAL innodb_buffer_pool_size=6291456; +SET GLOBAL innodb_buffer_pool_size = @save_size; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/r/innodb_buffer_pool_shrink_temporary.result b/mysql-test/suite/innodb/r/innodb_buffer_pool_shrink_temporary.result new file mode 100644 index 0000000000000..bfaf8df7a2e95 --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_buffer_pool_shrink_temporary.result @@ -0,0 +1,17 @@ +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); +SET @b=REPEAT('0',1048576); +CREATE TEMPORARY TABLE t (c MEDIUMTEXT) ENGINE=InnoDB; +INSERT INTO t VALUES +(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b); +SET STATEMENT max_statement_time=0.000001 FOR +SET GLOBAL innodb_buffer_pool_size=6291456; +SET STATEMENT max_statement_time=0.000001 FOR +SET GLOBAL innodb_buffer_pool_size=6291456; +SET STATEMENT max_statement_time=0.000001 FOR +SET GLOBAL innodb_buffer_pool_size=6291456; +SET GLOBAL innodb_buffer_pool_size=6291456; +SET GLOBAL innodb_buffer_pool_size=16777216; +CHECKSUM TABLE t; +Table Checksum +test.t 4050893687 +DROP TEMPORARY TABLE t; diff --git a/mysql-test/suite/innodb/r/lock_memory_debug.result b/mysql-test/suite/innodb/r/lock_memory_debug.result index 36d7433974e75..9015080570795 100644 --- a/mysql-test/suite/innodb/r/lock_memory_debug.result +++ b/mysql-test/suite/innodb/r/lock_memory_debug.result @@ -5,7 +5,7 @@ call mtr.add_suppression("\\[Warning\\] InnoDB: Over 67 percent of the buffer po CREATE TABLE t1 (col1 INT) ENGINE=InnoDB; INSERT INTO t1 VALUES (1),(2),(3),(4),(5); SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR -INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000; +INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g; ERROR HY000: The total number of locks exceeds the lock table size SELECT COUNT(*) FROM t1; COUNT(*) diff --git a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result index 6ae7c84807f87..f33c0d0798e1a 100644 --- a/mysql-test/suite/innodb/r/log_upgrade_101_flags.result +++ b/mysql-test/suite/innodb/r/log_upgrade_101_flags.result @@ -1,7 +1,7 @@ call mtr.add_suppression("InnoDB: The change buffer is corrupted"); call mtr.add_suppression("InnoDB: Tablespace size stored in header is 768 pages, but the sum of data file sizes is 384 pages"); call mtr.add_suppression("InnoDB: adjusting FSP_SPACE_FLAGS of file"); -# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=10M +# restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/log_upgrade --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' AND support IN ('YES', 'DEFAULT', 'ENABLED'); diff --git a/mysql-test/suite/innodb/r/recovery_memory.result b/mysql-test/suite/innodb/r/recovery_memory.result index 9aba9bccdb3c3..372adbf4f9d3f 100644 --- a/mysql-test/suite/innodb/r/recovery_memory.result +++ b/mysql-test/suite/innodb/r/recovery_memory.result @@ -12,7 +12,7 @@ END LOOP connect con1,localhost,root,,,; CALL dorepeat(); connection default; -# restart: --innodb_buffer_pool_size=5242880 +# restart: --innodb_buffer_pool_size=6m DROP TABLE t1; DROP PROCEDURE dorepeat; # diff --git a/mysql-test/suite/innodb/r/restart,16k.rdiff b/mysql-test/suite/innodb/r/restart,16k.rdiff deleted file mode 100644 index 3149b9aeab05c..0000000000000 --- a/mysql-test/suite/innodb/r/restart,16k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '5242879' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 5242880 for innodb_page_size=16384 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '5242879' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,32k.rdiff b/mysql-test/suite/innodb/r/restart,32k.rdiff deleted file mode 100644 index 3f00646cb374e..0000000000000 --- a/mysql-test/suite/innodb/r/restart,32k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '10485759' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 10485760 for innodb_page_size=32768 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '10485759' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,4k.rdiff b/mysql-test/suite/innodb/r/restart,4k.rdiff deleted file mode 100644 index b00c56ef81f56..0000000000000 --- a/mysql-test/suite/innodb/r/restart,4k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '2097151' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 2097152 for innodb_page_size=4096 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '2097151' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,64k.rdiff b/mysql-test/suite/innodb/r/restart,64k.rdiff deleted file mode 100644 index 886cbcde7d96e..0000000000000 --- a/mysql-test/suite/innodb/r/restart,64k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '20971519' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 20971520 for innodb_page_size=65536 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '20971519' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart,8k.rdiff b/mysql-test/suite/innodb/r/restart,8k.rdiff deleted file mode 100644 index 40a9e1bad1c6d..0000000000000 --- a/mysql-test/suite/innodb/r/restart,8k.rdiff +++ /dev/null @@ -1,16 +0,0 @@ ---- ./suite/innodb/r/restart.result -+++ suite/innodb/r/restart.reject -@@ -32,10 +32,10 @@ - SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; - SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); --ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of '3145727' - SHOW WARNINGS; - Level Code Message --Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE --Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -+Warning 1210 innodb_buffer_pool_size must be at least 3145728 for innodb_page_size=8192 -+Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of '3145727' - EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; diff --git a/mysql-test/suite/innodb/r/restart.result b/mysql-test/suite/innodb/r/restart.result index 3e5e5f2952ba8..6374717c2a5ef 100644 --- a/mysql-test/suite/innodb/r/restart.result +++ b/mysql-test/suite/innodb/r/restart.result @@ -23,19 +23,6 @@ SELECT * FROM td; a DROP TABLE tr,tc,td; # -# MDEV-27467 innodb to enfore the minimum innodb_buffer_pool_size in SET (resize) the same as startup -# -SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; -SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); -ERROR 42000: Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -SHOW WARNINGS; -Level Code Message -Warning 1210 innodb_buffer_pool_size must be at least MIN_VAL for innodb_page_size=PAGE_SIZE -Error 1231 Variable 'innodb_buffer_pool_size' can't be set to the value of 'WRONG_VALUE' -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); -SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; -# # MDEV-27882 Innodb - recognise MySQL-8.0 innodb flags and give a specific error message # FOUND 1 /InnoDB: MySQL-8\.0 tablespace in \./ibdata1/ in attempted_start.err diff --git a/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt b/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt deleted file mode 100644 index 09fd8bd8e35d2..0000000000000 --- a/mysql-test/suite/innodb/t/buf_pool_resize_oom.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-size=8m --innodb-buffer-pool-chunk-size=1m diff --git a/mysql-test/suite/innodb/t/buf_pool_resize_oom.test b/mysql-test/suite/innodb/t/buf_pool_resize_oom.test deleted file mode 100644 index ea13129e8b1ce..0000000000000 --- a/mysql-test/suite/innodb/t/buf_pool_resize_oom.test +++ /dev/null @@ -1,27 +0,0 @@ ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/not_embedded.inc - ---echo # ---echo # Bug #21348684 SIGABRT DURING RESIZING THE INNODB BUFFER POOL ---echo # ONLINE WITH MEMORY FULL CONDITION ---echo # - -call mtr.add_suppression("InnoDB: failed to allocate the chunk array"); - -SET GLOBAL debug_dbug='+d,buf_pool_resize_chunk_null'; - ---disable_warnings -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + 1048576; ---enable_warnings - -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 27) = 'Resizing buffer pool failed' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; - ---source include/wait_condition.inc -# Restart the server, because the buffer pool would not necessarily be -# shrunk afterwards even if we request it. ---source include/restart_mysqld.inc diff --git a/mysql-test/suite/innodb/t/innodb-index-online.opt b/mysql-test/suite/innodb/t/innodb-index-online.opt index ff20edbe2f74c..885ca8f219281 100644 --- a/mysql-test/suite/innodb/t/innodb-index-online.opt +++ b/mysql-test/suite/innodb/t/innodb-index-online.opt @@ -1,6 +1,6 @@ --loose-innodb-sort-buffer-size=64k --loose-innodb-online-alter-log-max-size=128k ---loose-innodb-buffer-pool-size=5M +--loose-innodb-buffer-pool-size=6M --loose-innodb-log-buffer-size=256k --loose-innodb-sys-indexes --loose-innodb-sys-fields diff --git a/mysql-test/suite/innodb/t/innodb-table-online-master.opt b/mysql-test/suite/innodb/t/innodb-table-online-master.opt index 92eea2b0d2ebe..33ca35c2118c4 100644 --- a/mysql-test/suite/innodb/t/innodb-table-online-master.opt +++ b/mysql-test/suite/innodb/t/innodb-table-online-master.opt @@ -1 +1 @@ ---innodb-sort-buffer-size=64k --innodb-online-alter-log-max-size=512k --innodb-buffer-pool-size=5M --innodb-log-buffer-size=256k +--innodb-sort-buffer-size=64k --innodb-online-alter-log-max-size=512k --innodb-buffer-pool-size=6M --innodb-log-buffer-size=256k diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt new file mode 100644 index 0000000000000..95f86e5992082 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=16m diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test index e8e070c506191..f082e7d7fd4f1 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_fail.test @@ -1,6 +1,6 @@ --source include/have_innodb.inc --source include/have_debug.inc -call mtr.add_suppression("InnoDB: Cannot allocate memory for the buffer pool"); +call mtr.add_suppression("InnoDB: Cannot map innodb_buffer_pool_size_max="); call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc.*"); call mtr.add_suppression("Plugin 'InnoDB' init function returned error."); call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); @@ -10,5 +10,5 @@ call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE faile let restart_parameters=--debug_dbug=+d,ib_buf_chunk_init_fails; --source include/restart_mysqld.inc let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; -let SEARCH_PATTERN=\[ERROR\] InnoDB: Cannot allocate memory for the buffer pool; +let SEARCH_PATTERN=\[ERROR\] InnoDB: Cannot map innodb_buffer_pool_size_max=16m; --source include/search_pattern_in_file.inc diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt index 39543543a5366..19074aa9024aa 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.opt @@ -1,2 +1,3 @@ --innodb-buffer-pool-size=8M +--innodb-buffer-pool-size-max=24M --innodb-page-size=4k diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test index 932829470e3e1..e48be6b43fa15 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize.test @@ -1,17 +1,14 @@ -# -# WL6117 : Resize the InnoDB Buffer Pool Online -# - --source include/have_innodb.inc ---source include/big_test.inc +--source include/have_sequence.inc +--source include/not_embedded.inc # there are no messages in mysqld.1.err -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 34) = 'Completed resizing buffer pool at ' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; +--echo # +--echo # MDEV-29445: Reorganize buffer pool (and remove chunks) +--echo # --disable_query_log +call mtr.add_suppression("InnoDB: Over 67 percent of the buffer pool is occupied by lock heaps"); +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); set @old_innodb_buffer_pool_size = @@innodb_buffer_pool_size; set @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; --enable_query_log @@ -21,52 +18,74 @@ set global innodb_adaptive_hash_index=ON; select @@innodb_buffer_pool_size; # Expand buffer pool +set global innodb_buffer_pool_size = 9437184; set global innodb_buffer_pool_size = 10485760; ---source include/wait_condition.inc - select @@innodb_buffer_pool_size; +let $kbs=`SELECT CAST(@@innodb_page_size / 1024 AS INT)`; # fill buffer pool --disable_query_log SET @save_innodb_read_only_compressed=@@GLOBAL.innodb_read_only_compressed; SET GLOBAL innodb_read_only_compressed=OFF; --enable_query_log -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; +create table t1 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED; +evalp create table t2 (id int primary key, val int not null) +ENGINE=InnoDB ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=$kbs; -set @`v_id` := 0; -set @`v_val` := 0; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t1 SELECT seq*4,seq*4 FROM seq_1_to_262144; +SET STATEMENT foreign_key_checks=0, unique_checks=0 FOR +INSERT INTO t2 SELECT seq*4,seq*4 FROM seq_1_to_16384; -# 2^18 == 262144 records -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; --disable_query_log SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; --enable_query_log -# Shrink buffer pool -set global innodb_buffer_pool_size = 7340032; ---source include/wait_condition.inc +SELECT @@GLOBAL.innodb_adaptive_hash_index; +--error 0,ER_WRONG_USAGE +SET STATEMENT max_statement_time=1e-9 FOR +SET GLOBAL innodb_buffer_pool_size = 7340032; +SELECT @@GLOBAL.innodb_adaptive_hash_index; -select @@innodb_buffer_pool_size; +--let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err +--let SEARCH_PATTERN= InnoDB: Trying to shrink innodb_buffer_pool_size=7m +--let SEARCH_PATTERN= innodb_buffer_pool_size=7m.*resized from|innodb_buffer_pool_size change aborted +--source include/search_pattern_in_file.inc + +# Attempt to shrink the buffer pool. This may occasionally fail. +--error 0,ER_WRONG_USAGE +set global innodb_buffer_pool_size = 7340032; select count(val) from t1; +select count(val) from t2; set global innodb_adaptive_hash_index=OFF; -# Expand buffer pool to 24MB -set global innodb_buffer_pool_size = 25165824; ---source include/wait_condition.inc +# Expand buffer pool to 23 and then 24 MiB (requesting 25 MiB) +set global innodb_buffer_pool_size = 24117248; +set global innodb_buffer_pool_size = 26214400; select @@innodb_buffer_pool_size; select count(val) from t1; +select count(val) from t2; -drop table t1; -drop view view0; +drop table t1,t2; ---disable_query_log -set global innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; -set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size; ---enable_query_log +SET GLOBAL innodb_max_purge_lag_wait = 0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; + +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; +let $wait_condition = +SELECT variable_value = 0 +FROM information_schema.global_status +WHERE variable_name = 'INNODB_BUFFER_POOL_PAGES_DIRTY'; --source include/wait_condition.inc +SET GLOBAL innodb_buffer_pool_size = @old_innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt deleted file mode 100644 index 72f055d3b58ae..0000000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.opt +++ /dev/null @@ -1,2 +0,0 @@ ---innodb-buffer-pool-chunk-size=1M ---loose-skip-innodb-disable-resize_buffer_pool_debug diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test deleted file mode 100644 index db5da2924fa98..0000000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_bigtest.test +++ /dev/null @@ -1,28 +0,0 @@ ---source include/have_innodb.inc ---source include/big_test.inc - -SET @save_size=@@innodb_buffer_pool_size; - -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; - ---echo # ---echo # MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize ---echo # after or during DROP TABLE ---echo # - -select @@innodb_buffer_pool_chunk_size; -CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; -SET GLOBAL innodb_buffer_pool_size=256*1024*1024; -DROP TABLE t1; ---source include/wait_condition.inc -SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size; ---source include/wait_condition.inc - ---echo # End of 10.6 tests - -SET GLOBAL innodb_buffer_pool_size=@save_size; ---source include/wait_condition.inc diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt deleted file mode 100644 index dca040ea893db..0000000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_debug.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-size=8M --innodb-buffer-pool-chunk-size=2M diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt new file mode 100644 index 0000000000000..95f86e5992082 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=16m diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test index 1cdf4a318d024..f96f89c39d566 100644 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_temporary.test @@ -1,24 +1,53 @@ --source include/have_innodb.inc --source include/have_sequence.inc --source include/have_debug.inc +--source include/have_debug_sync.inc SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; SET @save_size=@@GLOBAL.innodb_buffer_pool_size; SET GLOBAL innodb_limit_optimistic_insert_debug=2; - SET GLOBAL innodb_buffer_pool_size=16777216; +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); + +SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; +SET GLOBAL innodb_adaptive_hash_index = ON; +--error ER_WRONG_USAGE +SET STATEMENT debug_dbug='+d,buf_shrink_fail' FOR +SET GLOBAL innodb_buffer_pool_size=8388608; +SELECT @@GLOBAL.innodb_adaptive_hash_index,@@GLOBAL.innodb_buffer_pool_size; +SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; + CREATE TEMPORARY TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t1 SELECT seq FROM seq_1_to_200; -SET GLOBAL innodb_buffer_pool_size=8388608; +# Flush the buffer pool to prevent +# "innodb_buffer_pool_size change aborted" error with ./mtr --repeat=3 +SET GLOBAL innodb_max_purge_lag_wait=0; +SET @save_pct= @@GLOBAL.innodb_max_dirty_pages_pct; +SET @save_pct_lwm= @@GLOBAL.innodb_max_dirty_pages_pct_lwm; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.0; +SET GLOBAL innodb_max_dirty_pages_pct = 0.0; + +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +connect con1,localhost,root; +SET DEBUG_SYNC='buf_pool_shrink_before_wakeup SIGNAL blocked WAIT_FOR go'; +send SET GLOBAL innodb_buffer_pool_size=8388608; +connection default; +SET DEBUG_SYNC='now WAIT_FOR blocked'; +# adjust for 32-bit and SUX_LOCK_GENERIC +--replace_regex /(5..)\/\1/505\/505/ +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +SET DEBUG_SYNC='now SIGNAL go'; +connection con1; +reap; +disconnect con1; +connection default; +SHOW STATUS LIKE 'innodb_buffer_pool_resize_status'; +SET DEBUG_SYNC=RESET; -let $wait_timeout = 60; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 34) = 'Completed resizing buffer pool at ' - FROM information_schema.global_status - WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS'; ---source include/wait_condition.inc +SET GLOBAL innodb_max_dirty_pages_pct = @save_pct; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = @save_pct_lwm; SELECT COUNT(*),MIN(a),MAX(a) FROM t1; DROP TEMPORARY TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt deleted file mode 100644 index ade197de33801..0000000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.opt +++ /dev/null @@ -1,3 +0,0 @@ ---innodb-buffer-pool-size=16M ---innodb-buffer-pool-chunk-size=4M ---innodb-page-size=4k diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test deleted file mode 100644 index d11443ca1b220..0000000000000 --- a/mysql-test/suite/innodb/t/innodb_buffer_pool_resize_with_chunks.test +++ /dev/null @@ -1,61 +0,0 @@ -# -# WL6117 : Resize the InnoDB Buffer Pool Online -# (innodb_buffer_pool_chunk_size used case) -# - ---source include/have_innodb.inc ---source include/big_test.inc - -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 34) = 'Completed resizing buffer pool at ' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - ---disable_query_log -set @old_innodb_buffer_pool_size = @@innodb_buffer_pool_size; ---enable_query_log - -select @@innodb_buffer_pool_chunk_size; - -# fill buffer pool ---disable_query_log -SET @save_innodb_read_only_compressed=@@GLOBAL.innodb_read_only_compressed; -SET GLOBAL innodb_read_only_compressed=OFF; ---enable_query_log -create table t1 (id int not null, val int not null default '0', primary key (id)) ENGINE=InnoDB ROW_FORMAT=COMPRESSED; -create or replace view view0 as select 1 union all select 1; - -set @`v_id` := 0; -set @`v_val` := 0; - -# 2^18 == 262144 records -replace into t1 select (@`v_id` := (@`v_id` + 4) mod 4294967296) as id, (@`v_val` := (@`v_val` + 4) mod 4294967296) as val from view0 v0, view0 v1, view0 v2, view0 v3, view0 v4, view0 v5, view0 v6, view0 v7, view0 v8, view0 v9, view0 v10, view0 v11, view0 v12, view0 v13, view0 v14, view0 v15, view0 v16, view0 v17; ---disable_query_log -SET GLOBAL innodb_read_only_compressed=@save_innodb_read_only_compressed; ---enable_query_log - -# Shrink buffer pool to 7MB -set global innodb_buffer_pool_size = 7340032; ---source include/wait_condition.inc - -select count(val) from t1; - -# Expand buffer pool to 16MB -set global innodb_buffer_pool_size = 16777216; ---source include/wait_condition.inc - -select count(val) from t1; - -drop table t1; -drop view view0; - -# Try to shrink buffer pool to smaller than chunk size -set global innodb_buffer_pool_size = 2*1048576; ---source include/wait_condition.inc -select @@innodb_buffer_pool_size; - ---disable_query_log -set global innodb_buffer_pool_size = @old_innodb_buffer_pool_size; ---enable_query_log ---source include/wait_condition.inc diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink.test new file mode 100644 index 0000000000000..886e31955c686 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink.test @@ -0,0 +1,14 @@ +--source include/have_innodb.inc +--source include/have_partition.inc +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); +CREATE TABLE t (c INT) ENGINE=InnoDB PARTITION BY HASH(c) PARTITIONS 512; +BEGIN; +SELECT * FROM t LOCK IN SHARE MODE; +SET @save_size = @@GLOBAL.innodb_buffer_pool_size; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_buffer_pool_size=6291456; +COMMIT; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_buffer_pool_size=6291456; +SET GLOBAL innodb_buffer_pool_size = @save_size; +DROP TABLE t; diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink_temporary.opt b/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink_temporary.opt new file mode 100644 index 0000000000000..d8ba7cf7b0ff8 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink_temporary.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size=16m diff --git a/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink_temporary.test b/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink_temporary.test new file mode 100644 index 0000000000000..cf2ea4ad17539 --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_buffer_pool_shrink_temporary.test @@ -0,0 +1,20 @@ +--source include/have_innodb.inc +call mtr.add_suppression("innodb_buffer_pool_size change aborted"); +SET @b=REPEAT('0',1048576); +CREATE TEMPORARY TABLE t (c MEDIUMTEXT) ENGINE=InnoDB; +INSERT INTO t VALUES +(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b),(@b); +--error 0,ER_WRONG_USAGE +SET STATEMENT max_statement_time=0.000001 FOR +SET GLOBAL innodb_buffer_pool_size=6291456; +--error 0,ER_WRONG_USAGE +SET STATEMENT max_statement_time=0.000001 FOR +SET GLOBAL innodb_buffer_pool_size=6291456; +--error 0,ER_WRONG_USAGE +SET STATEMENT max_statement_time=0.000001 FOR +SET GLOBAL innodb_buffer_pool_size=6291456; +--error 0,ER_WRONG_USAGE +SET GLOBAL innodb_buffer_pool_size=6291456; +SET GLOBAL innodb_buffer_pool_size=16777216; +CHECKSUM TABLE t; +DROP TEMPORARY TABLE t; diff --git a/mysql-test/suite/innodb/t/lock_memory_debug.opt b/mysql-test/suite/innodb/t/lock_memory_debug.opt index 67c8423cf2a50..184ec4096a8e5 100644 --- a/mysql-test/suite/innodb/t/lock_memory_debug.opt +++ b/mysql-test/suite/innodb/t/lock_memory_debug.opt @@ -1 +1 @@ ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M diff --git a/mysql-test/suite/innodb/t/lock_memory_debug.test b/mysql-test/suite/innodb/t/lock_memory_debug.test index 588356f7fa417..58a76740dcb0b 100644 --- a/mysql-test/suite/innodb/t/lock_memory_debug.test +++ b/mysql-test/suite/innodb/t/lock_memory_debug.test @@ -15,7 +15,7 @@ INSERT INTO t1 VALUES (1),(2),(3),(4),(5); --error ER_LOCK_TABLE_FULL SET STATEMENT debug_dbug='+d,innodb_skip_lock_bitmap' FOR -INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g LIMIT 45000; +INSERT INTO t1 SELECT a.* FROM t1 a, t1 b, t1 c, t1 d, t1 e, t1 f, t1 g; SELECT COUNT(*) FROM t1; diff --git a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test index 4358ccfa1ca21..a9b567bfbb32c 100644 --- a/mysql-test/suite/innodb/t/log_upgrade_101_flags.test +++ b/mysql-test/suite/innodb/t/log_upgrade_101_flags.test @@ -73,7 +73,7 @@ print OUT chr(0); close OUT or die; EOF ---let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=10M +--let $restart_parameters= $dirs --innodb-force-recovery=5 --innodb-log-file-size=4m --innodb_page_size=32k --innodb_buffer_pool_size=11M --source include/start_mysqld.inc SELECT COUNT(*) FROM INFORMATION_SCHEMA.ENGINES WHERE engine = 'innodb' diff --git a/mysql-test/suite/innodb/t/mdev-15707.opt b/mysql-test/suite/innodb/t/mdev-15707.opt index fec3463c6d084..6add146aec929 100644 --- a/mysql-test/suite/innodb/t/mdev-15707.opt +++ b/mysql-test/suite/innodb/t/mdev-15707.opt @@ -1 +1 @@ ---innodb --innodb-buffer-pool-size=5MB --innodb-read-io-threads=1 --innodb-doublewrite=0 --innodb-flush-log-at-trx-commit=0 \ No newline at end of file +--innodb --innodb-buffer-pool-size=6MB --innodb-read-io-threads=1 --innodb-doublewrite=0 --innodb-flush-log-at-trx-commit=0 \ No newline at end of file diff --git a/mysql-test/suite/innodb/t/purge_secondary.opt b/mysql-test/suite/innodb/t/purge_secondary.opt index 2821c98397c15..fd3b12811e8db 100644 --- a/mysql-test/suite/innodb/t/purge_secondary.opt +++ b/mysql-test/suite/innodb/t/purge_secondary.opt @@ -1,4 +1,4 @@ --innodb-sys-tablestats ---innodb_buffer_pool_size=5M +--innodb_buffer_pool_size=6M --innodb_monitor_enable=module_buffer --skip-innodb-stats-persistent diff --git a/mysql-test/suite/innodb/t/recovery_memory.test b/mysql-test/suite/innodb/t/recovery_memory.test index 06101377f1040..51c0ce73b78eb 100644 --- a/mysql-test/suite/innodb/t/recovery_memory.test +++ b/mysql-test/suite/innodb/t/recovery_memory.test @@ -22,7 +22,7 @@ send CALL dorepeat(); connection default; sleep 10; let $shutdown_timeout=0; -let $restart_parameters=--innodb_buffer_pool_size=5242880; +let $restart_parameters=--innodb_buffer_pool_size=6m; --source include/restart_mysqld.inc DROP TABLE t1; DROP PROCEDURE dorepeat; @@ -33,11 +33,11 @@ DROP PROCEDURE dorepeat; --echo # if ($have_debug) { SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; -let $restart_parameters=--innodb_buffer_pool_size=5242880 --debug_dbug=+d,ibuf_init_corrupt; +let $restart_parameters=--innodb_buffer_pool_size=6m --debug_dbug=+d,ibuf_init_corrupt; } if (!$have_debug) { --echo SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; -let $restart_parameters=--innodb_buffer_pool_size=5242880; +let $restart_parameters=--innodb_buffer_pool_size=6m; } CREATE TABLE t1(f1 INT NOT NULL)ENGINE=InnoDB; INSERT INTO t1 SELECT * FROM seq_1_to_65536; diff --git a/mysql-test/suite/innodb/t/restart.opt b/mysql-test/suite/innodb/t/restart.opt deleted file mode 100644 index ce43e89cb2bf8..0000000000000 --- a/mysql-test/suite/innodb/t/restart.opt +++ /dev/null @@ -1,2 +0,0 @@ ---loose-innodb_disable_resize_buffer_pool_debug=0 ---innodb-buffer-pool-chunk-size=1M diff --git a/mysql-test/suite/innodb/t/restart.test b/mysql-test/suite/innodb/t/restart.test index d9a97f05eb311..73dacbc9b866d 100644 --- a/mysql-test/suite/innodb/t/restart.test +++ b/mysql-test/suite/innodb/t/restart.test @@ -83,31 +83,6 @@ SELECT * FROM tc; SELECT * FROM td; DROP TABLE tr,tc,td; ---echo # ---echo # MDEV-27467 innodb to enfore the minimum innodb_buffer_pool_size in SET (resize) the same as startup ---echo # - -let $wait_timeout = 180; -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 34) = 'Completed resizing buffer pool at ' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - ---disable_cursor_protocol -SELECT @@innodb_buffer_pool_size INTO @innodb_buffer_pool_size_orig; -SELECT CEILING((256 + 64) * @@innodb_page_size / 1048576) * 1048576 INTO @min_pool_size; ---enable_cursor_protocol ---error ER_WRONG_VALUE_FOR_VAR -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size -1); - -SHOW WARNINGS; - -EXECUTE IMMEDIATE 'SET GLOBAL innodb_buffer_pool_size = ?' USING (@min_pool_size); - ---source include/wait_condition.inc - -SET GLOBAL innodb_buffer_pool_size = @innodb_buffer_pool_size_orig; - --echo # --echo # MDEV-27882 Innodb - recognise MySQL-8.0 innodb flags and give a specific error message --echo # diff --git a/mysql-test/suite/innodb/t/update_time-master.opt b/mysql-test/suite/innodb/t/update_time-master.opt deleted file mode 100644 index f0fd647546d9b..0000000000000 --- a/mysql-test/suite/innodb/t/update_time-master.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-size=5M diff --git a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result index e5dd6820420d8..6e4dad9012708 100644 --- a/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_buffer_pool_size_basic.result @@ -1,16 +1,17 @@ SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size; -'#---------------------BS_STVARS_022_01----------------------#' -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) -1 -1 Expected '#---------------------BS_STVARS_022_02----------------------#' -SET @@GLOBAL.innodb_buffer_pool_size=10485760; -Expected succeeded -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) +SELECT @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size_max +8388608 +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max +1 +SET GLOBAL innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max + 1048576; +Warnings: +Warning 1292 Truncated incorrect innodb_buffer_pool_size value: '9437184' +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +@@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max 1 -1 Expected '#---------------------BS_STVARS_022_03----------------------#' SELECT @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -18,10 +19,6 @@ WHERE VARIABLE_NAME='innodb_buffer_pool_size'; @@GLOBAL.innodb_buffer_pool_size = VARIABLE_VALUE 1 1 Expected -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); -COUNT(@@GLOBAL.innodb_buffer_pool_size) -1 -1 Expected SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES WHERE VARIABLE_NAME='innodb_buffer_pool_size'; @@ -50,4 +47,7 @@ COUNT(@@GLOBAL.innodb_buffer_pool_size) 1 Expected SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size; ERROR 42S22: Unknown column 'innodb_buffer_pool_size' in 'SELECT' -# restart +SET GLOBAL innodb_buffer_pool_size = @start_buffer_pool_size; +SELECT @@innodb_buffer_pool_size = @start_buffer_pool_size; +@@innodb_buffer_pool_size = @start_buffer_pool_size +1 diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff index 531bae3fbdde2..73b9b0b4c4dcb 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff @@ -1,4 +1,6 @@ -@@ -49,7 +49,7 @@ +--- sysvars_innodb.result ++++ sysvars_innodb.result,32bit +@@ -48,7 +48,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 8 VARIABLE_SCOPE GLOBAL @@ -7,7 +9,7 @@ VARIABLE_COMMENT Number of InnoDB Adaptive Hash Index Partitions (default 8) NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 512 -@@ -73,7 +73,7 @@ +@@ -72,7 +72,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -16,20 +18,20 @@ VARIABLE_COMMENT The AUTOINC lock modes supported by InnoDB: 0 => Old style AUTOINC locking (for backward compatibility); 1 => New style AUTOINC locking; 2 => No AUTOINC locking (unsafe for SBR) NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -85,10 +85,10 @@ +@@ -84,10 +84,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 134217728 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT Size of a single memory chunk for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means disable resizing buffer pool. + VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1048576 -NUMERIC_MAX_VALUE 9223372036854775807 +NUMERIC_MAX_VALUE 2147483647 NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -121,7 +121,7 @@ +@@ -120,7 +120,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 25 VARIABLE_SCOPE GLOBAL @@ -38,7 +40,20 @@ VARIABLE_COMMENT Dump only the hottest N% of each buffer pool, defaults to 25 NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 100 -@@ -205,7 +205,7 @@ +@@ -192,10 +192,10 @@ + SESSION_VALUE NULL + DEFAULT_VALUE 134217728 + VARIABLE_SCOPE GLOBAL +-VARIABLE_TYPE BIGINT UNSIGNED ++VARIABLE_TYPE INT UNSIGNED + VARIABLE_COMMENT The size of the memory buffer InnoDB uses to cache data and indexes of its tables. + NUMERIC_MIN_VALUE 2097152 +-NUMERIC_MAX_VALUE 18446744073701163008 ++NUMERIC_MAX_VALUE 4292870144 + NUMERIC_BLOCK_SIZE 1048576 + ENUM_VALUE_LIST NULL + READ_ONLY NO +@@ -204,7 +204,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -47,7 +62,7 @@ VARIABLE_COMMENT A number between [0, 100] that tells how oftern buffer pool dump status in percentages should be printed. E.g. 10 means that buffer pool dump status is printed when every 10% of number of buffer pool pages are dumped. Default is 0 (only start and end status is printed). NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -325,7 +325,7 @@ +@@ -324,7 +324,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 5 VARIABLE_SCOPE GLOBAL @@ -56,7 +71,7 @@ VARIABLE_COMMENT If the compression failure rate of a table is greater than this number more padding is added to the pages to reduce the failures. A value of zero implies no padding NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 100 -@@ -349,7 +349,7 @@ +@@ -348,7 +348,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 50 VARIABLE_SCOPE GLOBAL @@ -65,7 +80,7 @@ VARIABLE_COMMENT Percentage of empty space on a data page that can be reserved to make the page compressible. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 75 -@@ -649,7 +649,7 @@ +@@ -636,7 +636,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 600 VARIABLE_SCOPE GLOBAL @@ -74,7 +89,7 @@ VARIABLE_COMMENT Maximum number of seconds that semaphore times out in InnoDB. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 4294967295 -@@ -697,7 +697,7 @@ +@@ -684,7 +684,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -83,7 +98,7 @@ VARIABLE_COMMENT Number of iterations over which the background flushing is averaged. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 1000 -@@ -721,7 +721,7 @@ +@@ -708,7 +708,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -92,7 +107,7 @@ VARIABLE_COMMENT Controls the durability/speed trade-off for commits. Set to 0 (write and flush redo log to disk only once per second), 1 (flush to disk at each commit), 2 (write to log at commit but flush to disk only once per second) or 3 (flush to disk at prepare and at commit, slower and usually redundant). 1 and 3 guarantees that after a crash, committed transactions will not be lost and will be consistent with the binlog and other transactional engines. 2 can get inconsistent and lose transactions if there is a power failure or kernel crash but not if mysqld crashes. 0 has no guarantees in case of crash. 0 and 2 can be faster than 1 or 3. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 3 -@@ -745,7 +745,7 @@ +@@ -732,7 +732,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1 VARIABLE_SCOPE GLOBAL @@ -101,7 +116,7 @@ VARIABLE_COMMENT Set to 0 (don't flush neighbors from buffer pool), 1 (flush contiguous neighbors from buffer pool) or 2 (flush neighbors from buffer pool), when flushing a block NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 2 -@@ -781,7 +781,7 @@ +@@ -768,7 +768,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -110,7 +125,7 @@ VARIABLE_COMMENT Helps to save your data in case the disk image of the database becomes corrupt. Value 5 can return bogus data, and 6 can permanently corrupt data. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 6 -@@ -805,10 +805,10 @@ +@@ -792,10 +792,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 8000000 VARIABLE_SCOPE GLOBAL @@ -123,7 +138,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -841,7 +841,7 @@ +@@ -828,7 +828,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 84 VARIABLE_SCOPE GLOBAL @@ -132,16 +147,16 @@ VARIABLE_COMMENT InnoDB Fulltext search maximum token size in characters NUMERIC_MIN_VALUE 10 NUMERIC_MAX_VALUE 84 -@@ -853,7 +853,7 @@ +@@ -840,7 +840,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 3 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT InnoDB Fulltext search minimum token size in characters - NUMERIC_MIN_VALUE 0 + NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 16 -@@ -865,7 +865,7 @@ +@@ -852,7 +852,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000 VARIABLE_SCOPE GLOBAL @@ -150,7 +165,7 @@ VARIABLE_COMMENT InnoDB Fulltext search number of words to optimize for each optimize table call NUMERIC_MIN_VALUE 1000 NUMERIC_MAX_VALUE 10000 -@@ -877,10 +877,10 @@ +@@ -864,10 +864,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 2000000000 VARIABLE_SCOPE GLOBAL @@ -163,7 +178,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -901,7 +901,7 @@ +@@ -888,7 +888,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 2 VARIABLE_SCOPE GLOBAL @@ -172,7 +187,7 @@ VARIABLE_COMMENT InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 16 -@@ -913,10 +913,10 @@ +@@ -900,10 +900,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 640000000 VARIABLE_SCOPE GLOBAL @@ -185,7 +200,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -961,22 +961,22 @@ +@@ -948,22 +948,22 @@ SESSION_VALUE NULL DEFAULT_VALUE 200 VARIABLE_SCOPE GLOBAL @@ -213,7 +228,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1009,10 +1009,10 @@ +@@ -996,10 +996,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 16777216 VARIABLE_SCOPE GLOBAL @@ -226,7 +241,7 @@ NUMERIC_BLOCK_SIZE 1024 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1057,7 +1057,7 @@ +@@ -1044,7 +1044,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 8192 VARIABLE_SCOPE GLOBAL @@ -235,7 +250,7 @@ VARIABLE_COMMENT Redo log write ahead unit size to avoid read-on-write, it should match the OS cache block IO size NUMERIC_MIN_VALUE 512 NUMERIC_MAX_VALUE 16384 -@@ -1069,10 +1069,10 @@ +@@ -1056,10 +1056,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 32 VARIABLE_SCOPE GLOBAL @@ -248,7 +263,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1081,10 +1081,10 @@ +@@ -1068,10 +1068,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 1536 VARIABLE_SCOPE GLOBAL @@ -261,7 +276,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1129,10 +1129,10 @@ +@@ -1104,10 +1104,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -274,7 +289,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1141,7 +1141,7 @@ +@@ -1116,7 +1116,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -283,7 +298,7 @@ VARIABLE_COMMENT Maximum delay of user threads in micro-seconds NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 10000000 -@@ -1273,10 +1273,10 @@ +@@ -1248,10 +1248,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL @@ -296,7 +311,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY YES -@@ -1309,7 +1309,7 @@ +@@ -1272,7 +1272,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 16384 VARIABLE_SCOPE GLOBAL @@ -305,16 +320,16 @@ VARIABLE_COMMENT Page size to use for all InnoDB tablespaces. NUMERIC_MIN_VALUE 4096 NUMERIC_MAX_VALUE 65536 -@@ -1345,7 +1345,7 @@ +@@ -1308,7 +1308,7 @@ SESSION_VALUE NULL - DEFAULT_VALUE 1000 + DEFAULT_VALUE 127 VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list. NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 5000 -@@ -1357,7 +1357,7 @@ +@@ -1320,7 +1320,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 128 VARIABLE_SCOPE GLOBAL @@ -323,7 +338,7 @@ VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 128 -@@ -1393,7 +1393,7 @@ +@@ -1356,7 +1356,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 56 VARIABLE_SCOPE GLOBAL @@ -332,7 +347,7 @@ VARIABLE_COMMENT Number of pages that must be accessed sequentially for InnoDB to trigger a readahead. NUMERIC_MIN_VALUE 0 NUMERIC_MAX_VALUE 64 -@@ -1465,7 +1465,7 @@ +@@ -1440,7 +1440,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 1048576 VARIABLE_SCOPE GLOBAL @@ -341,7 +356,7 @@ VARIABLE_COMMENT Memory buffer size for index creation NUMERIC_MIN_VALUE 65536 NUMERIC_MAX_VALUE 67108864 -@@ -1633,10 +1633,10 @@ +@@ -1608,10 +1608,10 @@ SESSION_VALUE NULL DEFAULT_VALUE 30 VARIABLE_SCOPE GLOBAL @@ -354,7 +369,7 @@ NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -@@ -1729,7 +1729,7 @@ +@@ -1692,7 +1692,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 0 VARIABLE_SCOPE GLOBAL diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 3b95064abd32b..3c355050fd6f5 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -5,6 +5,7 @@ variable_name not in ( 'innodb_numa_interleave', # only available WITH_NUMA 'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_use_native_aio', # default value depends on OS +'innodb_buffer_pool_size_max', # default value depends on OS 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing order by variable_name; VARIABLE_NAME INNODB_ADAPTIVE_FLUSHING @@ -84,7 +85,7 @@ SESSION_VALUE NULL DEFAULT_VALUE 134217728 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT Size of a single memory chunk for resizing buffer pool. Online buffer pool resizing happens at this granularity. 0 means disable resizing buffer pool. +VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1048576 NUMERIC_MAX_VALUE 9223372036854775807 NUMERIC_BLOCK_SIZE 1048576 @@ -194,7 +195,7 @@ VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT The size of the memory buffer InnoDB uses to cache data and indexes of its tables. NUMERIC_MIN_VALUE 2097152 -NUMERIC_MAX_VALUE 9223372036854775807 +NUMERIC_MAX_VALUE 18446744073701163008 NUMERIC_BLOCK_SIZE 1048576 ENUM_VALUE_LIST NULL READ_ONLY NO diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt deleted file mode 100644 index aa536bf0070f9..0000000000000 --- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic-master.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-buffer-pool-chunk-size=2M diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt new file mode 100644 index 0000000000000..373ccf8732e17 --- /dev/null +++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.opt @@ -0,0 +1 @@ +--innodb-buffer-pool-size-max=8m diff --git a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test index f37e3e2e0fde7..1807ffaada1c0 100644 --- a/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test +++ b/mysql-test/suite/sys_vars/t/innodb_buffer_pool_size_basic.test @@ -24,35 +24,19 @@ --source include/have_innodb.inc -let $wait_condition = - SELECT SUBSTR(variable_value, 1, 34) = 'Completed resizing buffer pool at ' - FROM information_schema.global_status - WHERE LOWER(variable_name) = 'innodb_buffer_pool_resize_status'; - SET @start_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size; ---echo '#---------------------BS_STVARS_022_01----------------------#' -#################################################################### -# Displaying default value # -#################################################################### -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - - --echo '#---------------------BS_STVARS_022_02----------------------#' #################################################################### # Check if Value can set # #################################################################### -SET @@GLOBAL.innodb_buffer_pool_size=10485760; ---echo Expected succeeded ---source include/wait_condition.inc - -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - - - +--enable_warnings +SELECT @@GLOBAL.innodb_buffer_pool_size_max; +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +SET GLOBAL innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max + 1048576; +SELECT @@GLOBAL.innodb_buffer_pool_size = @@GLOBAL.innodb_buffer_pool_size_max; +--disable_warnings --echo '#---------------------BS_STVARS_022_03----------------------#' ################################################################# @@ -66,9 +50,6 @@ WHERE VARIABLE_NAME='innodb_buffer_pool_size'; --enable_warnings --echo 1 Expected -SELECT COUNT(@@GLOBAL.innodb_buffer_pool_size); ---echo 1 Expected - --disable_warnings SELECT COUNT(VARIABLE_VALUE) FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES @@ -76,8 +57,6 @@ WHERE VARIABLE_NAME='innodb_buffer_pool_size'; --enable_warnings --echo 1 Expected - - --echo '#---------------------BS_STVARS_022_04----------------------#' ################################################################################ # Check if accessing variable with and without GLOBAL point to same variable # @@ -111,4 +90,6 @@ SELECT innodb_buffer_pool_size = @@SESSION.innodb_buffer_pool_size; # Restore the original buffer pool size. ---source include/restart_mysqld.inc +SET GLOBAL innodb_buffer_pool_size = @start_buffer_pool_size; + +SELECT @@innodb_buffer_pool_size = @start_buffer_pool_size; diff --git a/mysql-test/suite/sys_vars/t/sysvars_innodb.test b/mysql-test/suite/sys_vars/t/sysvars_innodb.test index 83869fb96b38a..385fe6138771f 100644 --- a/mysql-test/suite/sys_vars/t/sysvars_innodb.test +++ b/mysql-test/suite/sys_vars/t/sysvars_innodb.test @@ -16,5 +16,6 @@ select VARIABLE_NAME, SESSION_VALUE, DEFAULT_VALUE, VARIABLE_SCOPE, VARIABLE_TYP 'innodb_numa_interleave', # only available WITH_NUMA 'innodb_evict_tables_on_commit_debug', # one may want to override this 'innodb_use_native_aio', # default value depends on OS + 'innodb_buffer_pool_size_max', # default value depends on OS 'innodb_buffer_pool_load_pages_abort') # debug build only, and is only for testing order by variable_name; diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt index 515414c553ada..0327637e19906 100644 --- a/mysys/CMakeLists.txt +++ b/mysys/CMakeLists.txt @@ -46,7 +46,8 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c my_default.c my_uuid.c wqueue.c waiting_threads.c ma_dyncol.c ../sql-common/my_time.c my_rdtsc.c psi_noop.c my_atomic_writes.c my_cpu.c my_likely.c my_largepage.c - file_logger.c my_dlerror.c crc32/crc32c.cc) + file_logger.c my_dlerror.c crc32/crc32c.cc + my_virtual_mem.c) IF (WIN32) SET (MYSYS_SOURCES ${MYSYS_SOURCES} diff --git a/mysys/my_largepage.c b/mysys/my_largepage.c index 07859db2fedf6..f238a10c68c63 100644 --- a/mysys/my_largepage.c +++ b/mysys/my_largepage.c @@ -35,17 +35,11 @@ extern int memcntl(caddr_t, size_t, int, caddr_t, int, int); #endif /* __sun__ ... */ #endif /* HAVE_SOLARIS_LARGE_PAGES */ -#if defined(_WIN32) -static size_t my_large_page_size; -#define HAVE_LARGE_PAGES -#elif defined(HAVE_MMAP) -#define HAVE_LARGE_PAGES -#endif -#ifdef HAVE_LARGE_PAGES -static my_bool my_use_large_pages= 0; -#else -#define my_use_large_pages 0 +my_bool my_use_large_pages; + +#ifdef _WIN32 +static size_t my_large_page_size; #endif #if defined(HAVE_GETPAGESIZES) || defined(__linux__) @@ -172,7 +166,7 @@ static void my_get_large_page_sizes(size_t sizes[]) @retval a large page size that is valid on this system or 0 if no large page size possible. */ -#if defined(HAVE_MMAP) && !defined(_WIN32) +#ifndef _WIN32 static size_t my_next_large_page_size(size_t sz, int *start) { DBUG_ENTER("my_next_large_page_size"); @@ -188,11 +182,12 @@ static size_t my_next_large_page_size(size_t sz, int *start) } DBUG_RETURN(0); } -#endif /* defined(MMAP) || !defined(_WIN32) */ +#endif -int my_init_large_pages(my_bool super_large_pages) +int my_init_large_pages(void) { + my_use_large_pages= 1; #ifdef _WIN32 if (!my_obtain_privilege(SE_LOCK_MEMORY_NAME)) { @@ -200,19 +195,15 @@ int my_init_large_pages(my_bool super_large_pages) "Lock Pages in memory access rights required for use with" " large-pages, see https://mariadb.com/kb/en/library/" "mariadb-memory-allocation/#huge-pages", MYF(MY_WME)); + my_use_large_pages= 0; } my_large_page_size= GetLargePageMinimum(); #endif - my_use_large_pages= 1; my_get_large_page_sizes(my_large_page_sizes); -#ifndef HAVE_LARGE_PAGES - my_printf_error(EE_OUTOFMEMORY, "No large page support on this platform", - MYF(MY_WME)); -#endif - #ifdef HAVE_SOLARIS_LARGE_PAGES + extern my_bool opt_super_large_pages; /* tell the kernel that we want to use 4/256MB page for heap storage and also for the stack. We use 4 MByte as default and if the @@ -222,9 +213,15 @@ int my_init_large_pages(my_bool super_large_pages) measured in a number of GBytes. We use as big pages as possible which isn't bigger than the above desired page sizes. + + Note: This refers to some implementations of the SPARC ISA, + where the supported page sizes are + 8KiB, 64KiB, 512KiB, 4MiB, 32MiB, 256MiB, 2GiB, and 16GiB. + On implementations of the AMD64 ISA, the available page sizes + should be 4KiB, 2MiB, and 1GiB. */ int nelem= 0; - size_t max_desired_page_size= (super_large_pages ? 256 : 4) * 1024 * 1024; + size_t max_desired_page_size= opt_super_large_pages ? 256 << 20 : 4 << 20; size_t max_page_size= my_next_large_page_size(max_desired_page_size, &nelem); if (max_page_size > 0) @@ -404,6 +401,86 @@ uchar *my_large_malloc(size_t *size, myf my_flags) DBUG_RETURN(ptr); } +#ifndef _WIN32 +/** + Special large pages allocator, with possibility to commit to allocating + more memory later. + Every implementation returns a zero filled buffer here. +*/ +char *my_large_virtual_alloc(size_t *size) +{ + char *ptr; + DBUG_ENTER("my_large_virtual_alloc"); + + if (my_use_large_pages) + { + size_t large_page_size; + int page_i= 0; + + while ((large_page_size= my_next_large_page_size(*size, &page_i)) != 0) + { + int mapflag= MAP_PRIVATE | +# ifdef MAP_POPULATE + MAP_POPULATE | +# endif +# if defined MAP_HUGETLB /* linux 2.6.32 */ + MAP_HUGETLB | +# if defined MAP_HUGE_SHIFT /* Linux-3.8+ */ + my_bit_log2_size_t(large_page_size) << MAP_HUGE_SHIFT | +# else +# warning "No explicit large page (HUGETLB pages) support in Linux < 3.8" +# endif +# elif defined MAP_ALIGNED + MAP_ALIGNED(my_bit_log2_size_t(large_page_size)) | +# if defined MAP_ALIGNED_SUPER + MAP_ALIGNED_SUPER | +# endif +# endif + OS_MAP_ANON; + + size_t aligned_size= MY_ALIGN(*size, (size_t) large_page_size); + ptr= mmap(NULL, aligned_size, PROT_READ | PROT_WRITE, mapflag, -1, 0); + if (ptr == (void*) -1) + { + ptr= NULL; + /* try next smaller memory size */ + if (errno == ENOMEM) + continue; + + /* other errors are more serious */ + break; + } + else /* success */ + { + /* + we do need to record the adjustment so that munmap gets called with + the right size. This is only the case for HUGETLB pages. + */ + *size= aligned_size; + DBUG_RETURN(ptr); + } + } + + my_use_large_pages= FALSE; + } + +# ifdef _AIX + /* On IBM AIX, my_virtual_mem_commit() relies on mprotect(2) rather than + a subsequent mmap(2) with MAP_FIXED. */ + ptr= mmap(NULL, *size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | OS_MAP_ANON, -1, 0); +# else + ptr= mmap(NULL, *size, PROT_NONE, MAP_PRIVATE | OS_MAP_ANON, -1, 0); +# endif + if (ptr == MAP_FAILED) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size); + ptr= NULL; + } + + DBUG_RETURN(ptr); +} +#endif /** General large pages deallocator. @@ -460,7 +537,7 @@ void my_large_free(void *ptr, size_t size) #endif /* memory_sanitizer */ #else my_free_lock(ptr); -#endif /* HAVE_MMMAP */ +#endif /* HAVE_MMAP */ DBUG_VOID_RETURN; } diff --git a/mysys/my_virtual_mem.c b/mysys/my_virtual_mem.c new file mode 100644 index 0000000000000..7157dd7ae0611 --- /dev/null +++ b/mysys/my_virtual_mem.c @@ -0,0 +1,182 @@ +/* Copyright (c) 2025, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#include +#include +#include +#include +#ifdef _AIX +# include +#endif + +/* + Functionality for handling virtual memory + + - reserve range, + - commit memory (within reserved range) + - decommit previously commited memory + - release range + + Not every OS has a "reserve" functionality, i.e it is not always + possible to reserve memory larger than swap or RAM for example. + + We try to respect use_large_pages setting, on Windows and Linux +*/ +#ifdef _WIN32 +char *my_virtual_mem_reserve(size_t *size) +{ + DWORD flags= my_use_large_pages + ? MEM_LARGE_PAGES | MEM_RESERVE | MEM_COMMIT + : MEM_RESERVE; + char *ptr= VirtualAlloc(NULL, *size, flags, PAGE_READWRITE); + if (!ptr && (flags & MEM_LARGE_PAGES)) + { + /* Try without large pages */ + ptr= VirtualAlloc(NULL, *size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); + if (!ptr) + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), *size); + } + return ptr; +} +#endif + +#if defined _WIN32 && !defined DBUG_OFF +static my_bool is_memory_committed(char *ptr, size_t size) +{ + MEMORY_BASIC_INFORMATION mbi; + if (VirtualQuery(ptr, &mbi, sizeof mbi) == 0) + DBUG_ASSERT(0); + return !!(mbi.State & MEM_COMMIT); +} +#endif + +char *my_virtual_mem_commit(char *ptr, size_t size) +{ + DBUG_ASSERT(ptr); +#ifdef _WIN32 + if (my_use_large_pages) + { + DBUG_ASSERT(is_memory_committed(ptr, size)); + } + else + { + void *p= VirtualAlloc(ptr, size, MEM_COMMIT, PAGE_READWRITE); + DBUG_ASSERT(p == ptr); + if (!p) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } + } +#else + if (my_use_large_pages) + /* my_large_virtual_alloc() already created a read/write mapping. */; + else + { +# ifdef _AIX + /* + MAP_FIXED does not not work on IBM AIX in the way does works elsewhere. + Apparently, it is not possible to mmap(2) a range that is already in use, + at least not by default. + + mprotect(2) is the fallback, it can't communicate out-of-memory + conditions, but it looks like overcommitting is not possible on + AIX anyway. + */ + if (mprotect(ptr, size, PROT_READ | PROT_WRITE)) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } +# else + void *p= 0; + const int flags= + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + p= mmap(ptr, size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (p == MAP_FAILED) + { + my_error(EE_OUTOFMEMORY, MYF(ME_BELL + ME_ERROR_LOG), size); + return NULL; + } + DBUG_ASSERT(p == ptr); +# if defined MADV_FREE_REUSABLE && defined MADV_FREE_REUSE /* Apple macOS */ + madvise(ptr, size, MADV_FREE_REUSE); /* cancel MADV_FREE_REUSABLE */ +# endif +# endif + } +#endif + update_malloc_size(size, 0); + return ptr; +} + +void my_virtual_mem_decommit(char *ptr, size_t size) +{ +#ifdef _WIN32 + DBUG_ASSERT(is_memory_committed(ptr, size)); + if (!my_use_large_pages) + { + if (!VirtualFree(ptr, size, MEM_DECOMMIT)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + DBUG_ASSERT(0); + } + } +#else +# ifdef _AIX + disclaim(ptr, size, DISCLAIM_ZEROMEM); +# elif defined __linux__ || defined __osf__ + madvise(ptr, size, MADV_DONTNEED); /* OSF/1, Linux mimicing AIX disclaim() */ +# elif defined MADV_FREE_REUSABLE && defined MADV_FREE_REUSE + /* Mac OS X 10.9; undocumented in Apple macOS */ + madvise(ptr, size, MADV_FREE_REUSABLE); /* macOS mimicing AIX disclaim() */ +# elif defined MADV_PURGE /* Illumos */ + madvise(ptr, size, MADV_PURGE); /* Illumos mimicing AIX disclaim() */ +# elif defined MADV_FREE + /* FreeBSD, NetBSD, OpenBSD, Dragonfly BSD, OpenSolaris, Apple macOS */ + madvise(ptr, size, MADV_FREE); /* allow lazy zeroing out */ +# elif defined MADV_DONTNEED +# warning "It is unclear if madvise(MADV_DONTNEED) works as intended" + madvise(ptr, size, MADV_DONTNEED); +# else +# warning "Do not know how to decommit memory" +# endif + if (mprotect(ptr, size, PROT_NONE)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + DBUG_ASSERT(0); + } +#endif + update_malloc_size(-(longlong) size, 0); +} + +void my_virtual_mem_release(char *ptr, size_t size) +{ +#ifdef _WIN32 + DBUG_ASSERT(my_use_large_pages || !is_memory_committed(ptr, size)); + if (!VirtualFree(ptr, 0, MEM_RELEASE)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, + GetLastError()); + DBUG_ASSERT(0); + } +#else + if (munmap(ptr, size)) + { + my_error(EE_BADMEMORYRELEASE, MYF(ME_ERROR_LOG_ONLY), ptr, size, errno); + DBUG_ASSERT(0); + } +#endif +} diff --git a/sql/mysql_install_db.cc b/sql/mysql_install_db.cc index c1d71aa855beb..9194f7e06ce8b 100644 --- a/sql/mysql_install_db.cc +++ b/sql/mysql_install_db.cc @@ -334,7 +334,7 @@ static char *init_bootstrap_command_line(char *cmdline, size_t size) " --bootstrap" " --datadir=." " --tmpdir=." - " --loose-innodb-buffer-pool-size=20M" + " --loose-innodb-buffer-pool-size=21M" "\"" , mysqld_path, opt_verbose_bootstrap ? "--console" : ""); return cmdline; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 5707ce9034994..23aae2b188770 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -413,7 +413,9 @@ my_bool opt_require_secure_transport= 0; char* opt_secure_file_priv; my_bool lower_case_file_system= 0; my_bool opt_large_pages= 0; +#ifdef HAVE_SOLARIS_LARGE_PAGES my_bool opt_super_large_pages= 0; +#endif my_bool opt_myisam_use_mmap= 0; uint opt_large_page_size= 0; #if defined(ENABLED_DEBUG_SYNC) @@ -4007,7 +4009,7 @@ static int init_common_variables() if (opt_large_pages) { DBUG_PRINT("info", ("Large page set")); - if (my_init_large_pages(opt_super_large_pages)) + if (my_init_large_pages()) { return 1; } @@ -7763,7 +7765,9 @@ static int mysql_init_variables(void) bzero((char*) &global_status_var, offsetof(STATUS_VAR, last_cleared_system_status_var)); opt_large_pages= 0; +#ifdef HAVE_SOLARIS_LARGE_PAGES opt_super_large_pages= 0; +#endif #if defined(ENABLED_DEBUG_SYNC) opt_debug_sync_timeout= 0; #endif /* defined(ENABLED_DEBUG_SYNC) */ diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index ecec03315732f..e2570394ff139 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -195,7 +195,7 @@ static void btr_search_disable_ref_count(dict_table_t *table) } /** Lazily free detached metadata when removing the last reference. */ -ATTRIBUTE_COLD static void btr_search_lazy_free(dict_index_t *index) +ATTRIBUTE_COLD void btr_search_lazy_free(dict_index_t *index) { ut_ad(index->freed()); dict_table_t *table= index->table; @@ -217,8 +217,7 @@ ATTRIBUTE_COLD static void btr_search_lazy_free(dict_index_t *index) } } -/** Disable the adaptive hash search system and empty the index. */ -void btr_search_disable() +ATTRIBUTE_COLD bool btr_search_disable() { dict_table_t* table; @@ -229,7 +228,7 @@ void btr_search_disable() if (!btr_search_enabled) { dict_sys.unfreeze(); btr_search_x_unlock_all(); - return; + return false; } btr_search_enabled = false; @@ -257,23 +256,25 @@ void btr_search_disable() btr_search_sys.clear(); btr_search_x_unlock_all(); + + return true; } /** Enable the adaptive hash search system. @param resize whether buf_pool_t::resize() is the caller */ -void btr_search_enable(bool resize) +ATTRIBUTE_COLD void btr_search_enable(bool resize) { if (!resize) { mysql_mutex_lock(&buf_pool.mutex); - bool changed = srv_buf_pool_old_size != srv_buf_pool_size; + const auto is_shrinking = buf_pool.is_shrinking(); mysql_mutex_unlock(&buf_pool.mutex); - if (changed) { + if (is_shrinking) { return; } } btr_search_x_lock_all(); - ulint hash_size = buf_pool_get_curr_size() / sizeof(void *) / 64; + ulint hash_size = buf_pool.curr_pool_size() / sizeof(void *) / 64; if (btr_search_sys.parts[0].heap) { ut_ad(btr_search_enabled); @@ -935,88 +936,6 @@ btr_search_failure(btr_search_t* info, btr_cur_t* cursor) info->last_hash_succ = FALSE; } -/** Clear the adaptive hash index on all pages in the buffer pool. */ -inline void buf_pool_t::clear_hash_index() noexcept -{ - ut_ad(!resizing); - ut_ad(!btr_search_enabled); - - std::set garbage; - - for (chunk_t *chunk= chunks + n_chunks; chunk-- != chunks; ) - { - for (buf_block_t *block= chunk->blocks, * const end= block + chunk->size; - block != end; block++) - { - dict_index_t *index= block->index; - assert_block_ahi_valid(block); - - /* We can clear block->index and block->n_pointers when - holding all AHI latches exclusively; see the comments in buf0buf.h */ - - if (!index) - { -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - ut_a(!block->n_pointers); -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - continue; - } - - ut_d(const auto s= block->page.state()); - /* Another thread may have set the state to - REMOVE_HASH in buf_LRU_block_remove_hashed(). - - The state change in buf_pool_t::realloc() is not observable - here, because in that case we would have !block->index. - - In the end, the entire adaptive hash index will be removed. */ - ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); -# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG - block->n_pointers= 0; -# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - if (index->freed()) - garbage.insert(index); - block->index= nullptr; - } - } - - for (dict_index_t *index : garbage) - btr_search_lazy_free(index); -} - -/** Get a buffer block from an adaptive hash index pointer. -This function does not return if the block is not identified. -@param ptr pointer to within a page frame -@return pointer to block, never NULL */ -inline buf_block_t* buf_pool_t::block_from_ahi(const byte *ptr) const noexcept -{ - chunk_t::map *chunk_map = chunk_t::map_ref; - ut_ad(chunk_t::map_ref == chunk_t::map_reg); - ut_ad(!resizing); - - chunk_t::map::const_iterator it= chunk_map->upper_bound(ptr); - ut_a(it != chunk_map->begin()); - - chunk_t *chunk= it == chunk_map->end() - ? chunk_map->rbegin()->second - : (--it)->second; - - const size_t offs= size_t(ptr - chunk->blocks->page.frame) >> - srv_page_size_shift; - ut_a(offs < chunk->size); - - buf_block_t *block= &chunk->blocks[offs]; - /* buf_pool_t::chunk_t::init() invokes buf_block_init() so that - block[n].frame == block->page.frame + n * srv_page_size. Check it. */ - ut_ad(block->page.frame == page_align(ptr)); - /* Read the state of the block without holding hash_lock. - A state transition to REMOVE_HASH is possible during - this execution. */ - ut_ad(block->page.state() >= buf_page_t::REMOVE_HASH); - - return block; -} - /** Tries to guess the right search position based on the hash search info of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, and the function returns TRUE, then cursor->up_match and cursor->low_match @@ -1099,7 +1018,8 @@ btr_search_guess_on_hash( return false; } - buf_block_t* block = buf_pool.block_from_ahi(rec); + buf_block_t* block = buf_pool.block_from(rec); + ut_ad(block->page.frame == page_align(rec)); buf_pool_t::hash_chain& chain = buf_pool.page_hash.cell_get( block->page.id().fold()); @@ -2192,7 +2112,7 @@ static bool btr_search_hash_table_validate(THD *thd, ulint hash_table_id) for (; node != NULL; node = node->next) { const buf_block_t* block - = buf_pool.block_from_ahi((byte*) node->data); + = buf_pool.block_from(node->data); index_id_t page_index_id; if (UNIV_LIKELY(block->page.in_file())) { diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index 2c6b652d18f47..3a4e5191a27d9 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -162,6 +162,20 @@ buf_buddy_get( } #ifdef UNIV_DEBUG +const buf_block_t *buf_pool_t::contains_zip(const void *data, size_t shift) + const noexcept +{ + const size_t d= size_t(data) >> shift; + + for (size_t i= 0; i < n_blocks; i++) + { + const buf_block_t *block= get_nth_page(i); + if (size_t(block->page.zip.data) >> shift == d) + return block; + } + return nullptr; +} + /** Validate a given zip_free list. */ struct CheckZipFree { CheckZipFree(ulint i) : m_i(i) {} @@ -257,13 +271,10 @@ buf_buddy_is_free( /** Add a block to the head of the appropriate buddy free list. @param[in,out] buf block to be freed @param[in] i index of buf_pool.zip_free[] */ -UNIV_INLINE -void -buf_buddy_add_to_free(buf_buddy_free_t* buf, ulint i) +static void buf_buddy_add_to_free(buf_buddy_free_t *buf, ulint i) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_pool.zip_free[i].start != buf); - buf_buddy_stamp_free(buf, i); UT_LIST_ADD_FIRST(buf_pool.zip_free[i], buf); ut_d(buf_buddy_list_validate(i)); @@ -272,9 +283,7 @@ buf_buddy_add_to_free(buf_buddy_free_t* buf, ulint i) /** Remove a block from the appropriate buddy free list. @param[in,out] buf block to be freed @param[in] i index of buf_pool.zip_free[] */ -UNIV_INLINE -void -buf_buddy_remove_from_free(buf_buddy_free_t* buf, ulint i) +static void buf_buddy_remove_from_free(buf_buddy_free_t *buf, ulint i) { mysql_mutex_assert_owner(&buf_pool.mutex); ut_ad(buf_buddy_check_free(buf, i)); @@ -298,13 +307,10 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]); - if (buf_pool.is_shrinking() - && UT_LIST_GET_LEN(buf_pool.withdraw) - < buf_pool.withdraw_target) { - + if (size_t size = buf_pool.shrinking_size()) { while (buf != NULL && buf_pool.will_be_withdrawn( - reinterpret_cast(buf))) { + reinterpret_cast(buf), size)) { /* This should be withdrawn, not to be allocated */ buf = UT_LIST_GET_NEXT(list, buf); } @@ -312,6 +318,7 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) if (buf) { buf_buddy_remove_from_free(buf, i); + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); } else if (i + 1 < BUF_BUDDY_SIZES) { /* Attempt to split. */ buf = buf_buddy_alloc_zip(i + 1); @@ -321,7 +328,6 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) reinterpret_cast( reinterpret_cast(buf) + (BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool.contains_zip(buddy)); buf_buddy_add_to_free(buddy, i); } } @@ -340,74 +346,52 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) return(buf); } +#ifdef UNIV_DEBUG +/** number of blocks allocated to the buddy system */ +static size_t buf_buddy_n_frames; +#endif + /** Deallocate a buffer frame of srv_page_size. @param buf buffer frame to deallocate */ static void buf_buddy_block_free(void *buf) noexcept { mysql_mutex_assert_owner(&buf_pool.mutex); - ut_a(!ut_align_offset(buf, srv_page_size)); - - const ulint fold= BUF_POOL_ZIP_FOLD_PTR(buf); - buf_page_t **prev= buf_pool.zip_hash.cell_get(fold)-> - search(&buf_page_t::hash, [buf](const buf_page_t *b) - { - ut_ad(b->in_zip_hash); - ut_ad(b->state() == buf_page_t::MEMORY); - return b->frame == buf; - }); - - buf_page_t *bpage= *prev; - ut_a(bpage); - ut_a(bpage->frame == buf); - ut_d(bpage->in_zip_hash= false); - *prev= bpage->hash; - bpage->hash= nullptr; - + buf_block_t *block= buf_pool.block_from(buf); + ut_ad(block->page.state() == buf_page_t::MEMORY); + ut_ad(block->page.frame == buf); + ut_ad(!buf_pool.contains_zip(buf, srv_page_size_shift)); ut_d(memset(buf, 0, srv_page_size)); MEM_UNDEFINED(buf, srv_page_size); - - buf_LRU_block_free_non_file_page(reinterpret_cast(bpage)); - ut_ad(buf_pool.buddy_n_frames > 0); - ut_d(buf_pool.buddy_n_frames--); + buf_LRU_block_free_non_file_page(block); + ut_ad(buf_buddy_n_frames > 0); + ut_d(buf_buddy_n_frames--); } /** Allocate a buffer block to the buddy allocator. @param block buffer block to register */ static void buf_buddy_block_register(buf_block_t *block) noexcept { - const ulint fold= BUF_POOL_ZIP_FOLD(block); + ut_ad(buf_pool.is_uncompressed_current(block)); ut_ad(block->page.state() == buf_page_t::MEMORY); - - ut_a(block->page.frame); - ut_a(!ut_align_offset(block->page.frame, srv_page_size)); - - ut_ad(!block->page.in_zip_hash); - ut_d(block->page.in_zip_hash= true); - buf_pool.zip_hash.cell_get(fold)->append(block->page, &buf_page_t::hash); - ut_d(buf_pool.buddy_n_frames++); + ut_d(buf_buddy_n_frames++); } /** Allocate a block from a bigger object. @param[in] buf a block that is free to use @param[in] i index of buf_pool.zip_free[] -@param[in] j size of buf as an index of buf_pool.zip_free[] @return allocated block */ -static -void* -buf_buddy_alloc_from(void* buf, ulint i, ulint j) +static void *buf_buddy_alloc_from(void *buf, ulint i) { - ulint offs = BUF_BUDDY_LOW << j; - ut_ad(j <= BUF_BUDDY_SIZES); ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - ut_ad(j >= i); - ut_ad(!ut_align_offset(buf, offs)); + ut_ad(i <= BUF_BUDDY_SIZES); + ut_ad(!ut_align_offset(buf, srv_page_size)); + ut_ad(!buf_pool.contains_zip(buf, srv_page_size_shift)); /* Add the unused parts of the block to the free lists. */ - while (j > i) { + for (ulint j = BUF_BUDDY_SIZES, offs = srv_page_size; j-- > i; ) { buf_buddy_free_t* zip_buf; offs >>= 1; - j--; zip_buf = reinterpret_cast( reinterpret_cast(buf) + offs); @@ -422,7 +406,7 @@ buf_buddy_alloc_from(void* buf, ulint i, ulint j) @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -byte *buf_buddy_alloc_low(ulint i, bool *lru) +byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept { buf_block_t* block; @@ -439,7 +423,7 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru) } /* Try allocating from the buf_pool.free list. */ - block = buf_LRU_get_free_only(); + block = buf_pool.allocate(); if (block) { goto alloc_big; @@ -455,21 +439,21 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru) buf_buddy_block_register(block); block = reinterpret_cast( - buf_buddy_alloc_from(block->page.frame, i, BUF_BUDDY_SIZES)); + buf_buddy_alloc_from(block->page.frame, i)); func_exit: buf_pool.buddy_stat[i].used++; return reinterpret_cast(block); } -/** Try to relocate a block. The caller must hold zip_free_mutex, and this -function will release and lock it again. +/** Try to relocate a block. @param[in] src block to relocate @param[in] dst free block to relocated to @param[in] i index of buf_pool.zip_free[] @param[in] force true if we must relocated always @return true if relocated */ -static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) +static bool buf_buddy_relocate(void *src, void *dst, ulint i, bool force) + noexcept { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; @@ -575,7 +559,7 @@ static bool buf_buddy_relocate(void* src, void* dst, ulint i, bool force) @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -void buf_buddy_free_low(void* buf, ulint i) +void buf_buddy_free_low(void* buf, ulint i) noexcept { buf_buddy_free_t* buddy; @@ -595,13 +579,12 @@ void buf_buddy_free_low(void* buf, ulint i) ut_ad(i < BUF_BUDDY_SIZES); ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i)); - ut_ad(!buf_pool.contains_zip(buf)); + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); /* Do not recombine blocks if there are few free blocks. We may waste up to 15360*max_len bytes to free blocks (1024 + 2048 + 4096 + 8192 = 15360) */ - if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16 - && !buf_pool.is_shrinking()) { + if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16) { goto func_exit; } @@ -615,10 +598,9 @@ void buf_buddy_free_low(void* buf, ulint i) /* The buddy is free: recombine */ buf_buddy_remove_from_free(buddy, i); buddy_is_free: - ut_ad(!buf_pool.contains_zip(buddy)); i++; buf = ut_align_down(buf, BUF_BUDDY_LOW << i); - + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + i)); goto recombine; case BUF_BUDDY_STATE_USED: @@ -655,107 +637,119 @@ void buf_buddy_free_low(void* buf, ulint i) buf_buddy_add_to_free(reinterpret_cast(buf), i); } -/** Try to reallocate a block. -@param[in] buf buf_pool block to be reallocated -@param[in] size block size, up to srv_page_size -@return whether the reallocation succeeded */ -bool -buf_buddy_realloc(void* buf, ulint size) +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::shrink(). +@param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page +@param block uncompressed block for storage +@return block +@retval nullptr if the block was consumed */ +ATTRIBUTE_COLD +buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept { - buf_block_t* block = NULL; - ulint i = buf_buddy_get_slot(size); + ut_ad(bpage->zip.data); - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(i <= BUF_BUDDY_SIZES); - ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); - - if (i < BUF_BUDDY_SIZES) { - /* Try to allocate from the buddy system. */ - block = reinterpret_cast(buf_buddy_alloc_zip(i)); - } + void *dst= nullptr; + ulint size= page_zip_get_size(&bpage->zip); + ulint i= buf_buddy_get_slot(size); - if (block == NULL) { - /* Try allocating from the buf_pool.free list. */ - block = buf_LRU_get_free_only(); - - if (block == NULL) { - return(false); /* free_list was not enough */ - } - - buf_buddy_block_register(block); - - block = reinterpret_cast( - buf_buddy_alloc_from( - block->page.frame, i, BUF_BUDDY_SIZES)); - } - - buf_pool.buddy_stat[i].used++; - - /* Try to relocate the buddy of buf to the free block. */ - if (buf_buddy_relocate(buf, block, i, true)) { - /* succeeded */ - buf_buddy_free_low(buf, i); - } else { - /* failed */ - buf_buddy_free_low(block, i); - } - - return(true); /* free_list was enough */ + ut_ad(bpage->can_relocate()); + ut_ad(i <= BUF_BUDDY_SIZES); + ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN)); + + if (UNIV_LIKELY(i < BUF_BUDDY_SIZES)) + dst= buf_buddy_alloc_zip(i); + + if (!dst) + { + buf_buddy_block_register(block); + dst= buf_buddy_alloc_from(block->page.frame, i); + ut_ad(dst); + block= nullptr; + } + + void *src= bpage->zip.data; + memcpy_aligned(dst, src, size); + bpage->zip.data= static_cast(dst); + buf_pool.buddy_stat[i].relocated++; + + while (i < BUF_BUDDY_SIZES) + { + MEM_UNDEFINED(src, BUF_BUDDY_LOW << i); + /* Try to combine adjacent blocks. */ + buf_buddy_free_t *buddy= reinterpret_cast + (buf_buddy_get(static_cast(src), BUF_BUDDY_LOW << i)); + + if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE) + { + ut_ad(!buf_pool.contains_zip(src, BUF_BUDDY_LOW_SHIFT + i)); + buf_buddy_add_to_free(static_cast(src), i); + return block; + } + + /* The buddy is free: recombine */ + buf_buddy_remove_from_free(buddy, i); + i++; + src= ut_align_down(src, BUF_BUDDY_LOW << i); + } + + buf_buddy_block_free(src); + return block; } -/** Combine all pairs of free buddies. */ -void buf_buddy_condense_free() +/** Combine all pairs of free buddies. +@param size the target innodb_buffer_pool_size */ +ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept { - mysql_mutex_assert_owner(&buf_pool.mutex); - ut_ad(buf_pool.is_shrinking()); + ut_ad(size); + ut_ad(size == buf_pool.shrinking_size()); - for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) { - buf_buddy_free_t* buf = - UT_LIST_GET_FIRST(buf_pool.zip_free[i]); + for (ulint i= 0; i < array_elements(buf_pool.zip_free); i++) + { + buf_buddy_free_t *buf= UT_LIST_GET_FIRST(buf_pool.zip_free[i]); - /* seek to withdraw target */ - while (buf != NULL - && !buf_pool.will_be_withdrawn( - reinterpret_cast(buf))) { - buf = UT_LIST_GET_NEXT(list, buf); - } - - while (buf != NULL) { - buf_buddy_free_t* next = - UT_LIST_GET_NEXT(list, buf); - - buf_buddy_free_t* buddy = - reinterpret_cast( - buf_buddy_get( - reinterpret_cast(buf), - BUF_BUDDY_LOW << i)); - - /* seek to the next withdraw target */ - while (true) { - while (next != NULL - && !buf_pool.will_be_withdrawn( - reinterpret_cast(next))) { - next = UT_LIST_GET_NEXT(list, next); - } - - if (buddy != next) { - break; - } - - next = UT_LIST_GET_NEXT(list, next); - } - - if (buf_buddy_is_free(buddy, i) - == BUF_BUDDY_STATE_FREE) { - /* Both buf and buddy are free. - Try to combine them. */ - buf_buddy_remove_from_free(buf, i); - buf_pool.buddy_stat[i].used++; - - buf_buddy_free_low(buf, i); - } + /* seek to withdraw target */ + while (buf && + !buf_pool.will_be_withdrawn(reinterpret_cast(buf), size)) + buf= UT_LIST_GET_NEXT(list, buf); - buf = next; - } - } + for (buf_buddy_free_t *next= buf; buf; buf= next) + { + buf_buddy_free_t *buddy= reinterpret_cast + (buf_buddy_get(reinterpret_cast(buf), BUF_BUDDY_LOW << i)); + + /* seek to the next withdraw target */ + do + { + while ((next= UT_LIST_GET_NEXT(list, next)) && + !buf_pool.will_be_withdrawn(reinterpret_cast(next), + size)) {} + } + while (buddy == next); + + if (buf_buddy_is_free(buddy, i) != BUF_BUDDY_STATE_FREE) + continue; + + buf_buddy_remove_from_free(buf, i); + ulint j= i; + recombine: + buf_buddy_remove_from_free(buddy, j); + j++; + buf= static_cast + (ut_align_down(buf, BUF_BUDDY_LOW << j)); + MEM_UNDEFINED(buf, BUF_BUDDY_LOW << j); + + if (j == BUF_BUDDY_SIZES) + { + buf_buddy_block_free(buf); + continue; + } + + buddy= reinterpret_cast + (buf_buddy_get(reinterpret_cast(buf), BUF_BUDDY_LOW << j)); + if (buf_buddy_is_free(buddy, j) == BUF_BUDDY_STATE_FREE) + goto recombine; + + ut_ad(!buf_pool.contains_zip(buf, BUF_BUDDY_LOW_SHIFT + j)); + buf_buddy_add_to_free(buf, j); + } + } } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 67bac28ce3f93..b68e4726d29d0 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -47,8 +47,6 @@ Created 11/5/1995 Heikki Tuuri #include "lock0lock.h" #include "btr0sea.h" #include "ibuf0ibuf.h" -#include "trx0undo.h" -#include "trx0purge.h" #include "log0log.h" #include "dict0stats_bg.h" #include "srv0srv.h" @@ -64,6 +62,7 @@ Created 11/5/1995 Heikki Tuuri #include #include #include "log.h" +#include "my_virtual_mem.h" using st_::span; @@ -277,6 +276,56 @@ the read requests for the whole area. */ #ifndef UNIV_INNOCHECKSUM +/** Compute the number of page frames needed for buf_block_t, +per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr uint8_t first_page(size_t ps) +{ + return uint8_t(innodb_buffer_pool_extent_size / ps - + innodb_buffer_pool_extent_size / (ps + sizeof(buf_block_t))); +} + +/** Compute the number of bytes needed for buf_block_t, +per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr size_t first_frame(size_t ps) +{ + return first_page(ps) * ps; +} + +/** Compute the number of pages per innodb_buffer_pool_extent_size. +@param ps innodb_page_size +@return number of buf_block_t frames per extent */ +static constexpr uint16_t pages(size_t ps) +{ + return uint16_t(innodb_buffer_pool_extent_size / ps - first_page(ps)); +} + +/** The byte offset of the first page frame in a buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr size_t first_frame_in_extent[]= +{ + first_frame(4096), first_frame(8192), first_frame(16384), + first_frame(32768), first_frame(65536) +}; + +/** The position offset of the first page frame in a buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr uint8_t first_page_in_extent[]= +{ + first_page(4096), first_page(8192), first_page(16384), + first_page(32768), first_page(65536) +}; + +/** Number of pages per buffer pool extent +of innodb_buffer_pool_extent_size bytes */ +static constexpr size_t pages_in_extent[]= +{ + pages(4096), pages(8192), pages(16384), pages(32768), pages(65536) +}; + # ifdef SUX_LOCK_GENERIC void page_hash_latch::read_lock_wait() noexcept { @@ -326,8 +375,6 @@ const byte *field_ref_zero; /** The InnoDB buffer pool */ buf_pool_t buf_pool; -buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_reg; -buf_pool_t::chunk_t::map *buf_pool_t::chunk_t::map_ref; #ifdef UNIV_DEBUG /** This is used to insert validation operations in execution @@ -775,49 +822,38 @@ buf_page_is_corrupted( #ifndef UNIV_INNOCHECKSUM #if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) -/** Enable buffers to be dumped to core files +/** Enable buffers to be dumped to core files. -A convience function, not called anyhwere directly however +A convenience function, not called anyhwere directly however it is left available for gdb or any debugger to call in the event that you want all of the memory to be dumped to a core file. -Returns number of errors found in madvise calls. */ +@return number of errors found in madvise() calls */ MY_ATTRIBUTE((used)) -int -buf_madvise_do_dump() +int buf_pool_t::madvise_do_dump() noexcept { int ret= 0; /* mirrors allocation in log_t::create() */ if (log_sys.buf) { - ret += madvise(log_sys.buf, - srv_log_buffer_size, - MADV_DODUMP); - ret += madvise(log_sys.flush_buf, - srv_log_buffer_size, + ret += madvise(log_sys.buf, srv_log_buffer_size, MADV_DODUMP); + ret += madvise(log_sys.flush_buf, srv_log_buffer_size, MADV_DODUMP); } + /* mirrors recv_sys_t::create() */ - if (recv_sys.buf) - { + if (recv_sys.buf) { ret+= madvise(recv_sys.buf, recv_sys.len, MADV_DODUMP); } - mysql_mutex_lock(&buf_pool.mutex); - auto chunk = buf_pool.chunks; - - for (ulint n = buf_pool.n_chunks; n--; chunk++) { - ret+= madvise(chunk->mem, chunk->mem_size(), MADV_DODUMP); - } - - mysql_mutex_unlock(&buf_pool.mutex); + ret+= madvise(buf_pool.memory, buf_pool.size_in_bytes, MADV_DODUMP); return ret; } #endif #ifndef UNIV_DEBUG -static inline byte hex_to_ascii(byte hex_digit) +static inline byte hex_to_ascii(byte hex_digit) noexcept { const int offset= hex_digit <= 9 ? '0' : 'a' - 10; return byte(hex_digit + offset); @@ -853,163 +889,80 @@ void buf_page_print(const byte *read_buf, ulint zip_size) noexcept #endif } -/** Initialize a buffer page descriptor. -@param[in,out] block buffer page descriptor -@param[in] frame buffer page frame */ -static -void -buf_block_init(buf_block_t* block, byte* frame) +IF_DBUG(,inline) byte *buf_block_t::frame_address() const noexcept { - /* This function should only be executed at database startup or by - buf_pool.resize(). Either way, adaptive hash index must not exist. */ - assert_block_ahi_empty_on_init(block); - - block->page.frame = frame; - - MEM_MAKE_DEFINED(&block->modify_clock, sizeof block->modify_clock); - ut_ad(!block->modify_clock); - MEM_MAKE_DEFINED(&block->page.lock, sizeof block->page.lock); - block->page.lock.init(); - block->page.init(buf_page_t::NOT_USED, page_id_t(~0ULL)); -#ifdef BTR_CUR_HASH_ADAPT - MEM_MAKE_DEFINED(&block->index, sizeof block->index); - ut_ad(!block->index); -#endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->in_unzip_LRU_list = false); - ut_d(block->in_withdraw_list = false); - - page_zip_des_init(&block->page.zip); - - MEM_MAKE_DEFINED(&block->page.hash, sizeof block->page.hash); - ut_ad(!block->page.hash); + static_assert(ut_is_2pow(innodb_buffer_pool_extent_size), ""); + + byte *frame_= reinterpret_cast + ((reinterpret_cast(this) & ~(innodb_buffer_pool_extent_size - 1)) | + first_frame_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]); + ut_ad(reinterpret_cast(this) + sizeof(*this) <= frame_); + frame_+= + (((reinterpret_cast(this) & (innodb_buffer_pool_extent_size - 1)) / + sizeof(*this)) << srv_page_size_shift); + return frame_; } -/** Allocate a chunk of buffer frames. -@param bytes requested size -@return whether the allocation succeeded */ -inline bool buf_pool_t::chunk_t::create(size_t bytes) noexcept +buf_block_t *buf_pool_t::block_from(const void *ptr) noexcept { - DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", return false;); - /* Round down to a multiple of page size, although it already should be. */ - bytes= ut_2pow_round(bytes, srv_page_size); - - mem= buf_pool.allocator.allocate_large_dontdump(bytes, &mem_pfx); - - if (UNIV_UNLIKELY(!mem)) - return false; - - MEM_UNDEFINED(mem, mem_size()); - -#ifdef HAVE_LIBNUMA - if (srv_numa_interleave) - { - struct bitmask *numa_mems_allowed= numa_get_mems_allowed(); - MEM_MAKE_DEFINED(numa_mems_allowed, sizeof *numa_mems_allowed); - if (mbind(mem, mem_size(), MPOL_INTERLEAVE, - numa_mems_allowed->maskp, numa_mems_allowed->size, - MPOL_MF_MOVE)) - { - ib::warn() << "Failed to set NUMA memory policy of" - " buffer pool page frames to MPOL_INTERLEAVE" - " (error: " << strerror(errno) << ")."; - } - numa_bitmask_free(numa_mems_allowed); - } -#endif /* HAVE_LIBNUMA */ - - - /* Allocate the block descriptors from - the start of the memory block. */ - blocks= reinterpret_cast(mem); - - /* Align a pointer to the first frame. Note that when - opt_large_page_size is smaller than srv_page_size, - (with max srv_page_size at 64k don't think any hardware - makes this true), - we may allocate one fewer block than requested. When - it is bigger, we may allocate more blocks than requested. */ - static_assert(sizeof(byte*) == sizeof(ulint), "pointer size"); - - byte *frame= reinterpret_cast((reinterpret_cast(mem) + - srv_page_size - 1) & - ~ulint{srv_page_size - 1}); - size= (mem_pfx.m_size >> srv_page_size_shift) - (frame != mem); - - /* Subtract the space needed for block descriptors. */ - { - ulint s= size; - - while (frame < reinterpret_cast(blocks + s)) - { - frame+= srv_page_size; - s--; - } - - size= s; - } - - /* Init block structs and assign frames for them. Then we assign the - frames to the first blocks (we already mapped the memory above). */ - - buf_block_t *block= blocks; - - for (auto i= size; i--; ) { - buf_block_init(block, frame); - MEM_UNDEFINED(block->page.frame, srv_page_size); - /* Add the block to the free list */ - UT_LIST_ADD_LAST(buf_pool.free, &block->page); - - ut_d(block->page.in_free_list = TRUE); - block++; - frame+= srv_page_size; - } + static_assert(ut_is_2pow(innodb_buffer_pool_extent_size), ""); + ut_ad(static_cast(ptr) >= buf_pool.memory); + + byte *first_block= reinterpret_cast + (reinterpret_cast(ptr) & ~(innodb_buffer_pool_extent_size - 1)); + const size_t first_frame= + first_frame_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + + ut_ad(static_cast(ptr) >= first_block + first_frame); + return reinterpret_cast(first_block) + + (((size_t(ptr) & (innodb_buffer_pool_extent_size - 1)) - first_frame) >> + srv_page_size_shift); +} - reg(); +/** Determine the address of the first invalid block descriptor +@param n_blocks buf_pool.n_blocks +@return offset of the first invalid buf_block_t, relative to buf_pool.memory */ +static size_t block_descriptors_in_bytes(size_t n_blocks) noexcept +{ + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + const size_t extent_size= pages_in_extent[ssize]; + return n_blocks / extent_size * innodb_buffer_pool_extent_size + + (n_blocks % extent_size) * sizeof(buf_block_t); +} - return true; +buf_block_t *buf_pool_t::get_nth_page(size_t pos) const noexcept +{ + mysql_mutex_assert_owner(&mutex); + ut_ad(pos < n_blocks); + return reinterpret_cast + (memory + block_descriptors_in_bytes(pos)); } -#ifdef UNIV_DEBUG -/** Check that all file pages in the buffer chunk are in a replaceable state. -@return address of a non-free block -@retval nullptr if all freed */ -inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const noexcept +buf_block_t *buf_pool_t::allocate() noexcept { - buf_block_t *block= blocks; - for (auto i= size; i--; block++) + mysql_mutex_assert_owner(&mutex); + + while (buf_page_t *b= UT_LIST_GET_FIRST(free)) { - if (block->page.in_file()) + ut_ad(b->in_free_list); + ut_d(b->in_free_list = FALSE); + ut_ad(!b->oldest_modification()); + ut_ad(!b->in_LRU_list); + ut_a(!b->in_file()); + UT_LIST_REMOVE(free, b); + + if (UNIV_LIKELY(!n_blocks_to_withdraw) || !withdraw(*b)) { - /* The uncompressed buffer pool should never - contain ROW_FORMAT=COMPRESSED block descriptors. */ - ut_ad(block->page.frame); - const lsn_t lsn= block->page.oldest_modification(); - - if (srv_read_only_mode) - { - /* The page cleaner is disabled in read-only mode. No pages - can be dirtied, so all of them must be clean. */ - ut_ad(lsn == 0 || lsn == recv_sys.recovered_lsn || - srv_force_recovery == SRV_FORCE_NO_LOG_REDO); - break; - } - - if (fsp_is_system_temporary(block->page.id().space())) - { - ut_ad(lsn == 0 || lsn == 2); - break; - } - - if (lsn > 1 || !block->page.can_relocate()) - return block; - - break; + /* No adaptive hash index entries may point to a free block. */ + assert_block_ahi_empty(reinterpret_cast(b)); + b->set_state(buf_page_t::MEMORY); + b->set_os_used(); + return reinterpret_cast(b); } } return nullptr; } -#endif /* UNIV_DEBUG */ /** Create the hash table. @param n the lower bound of n_cells */ @@ -1023,92 +976,189 @@ void buf_pool_t::page_hash_table::create(ulint n) noexcept array= static_cast(v); } +size_t buf_pool_t::get_n_blocks(size_t size_in_bytes) noexcept +{ + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + size_t n_blocks_alloc= size_in_bytes / innodb_buffer_pool_extent_size * + pages_in_extent[ssize]; + + if (const size_t incomplete_extent_pages= + (size_in_bytes & (innodb_buffer_pool_extent_size - 1)) >> + srv_page_size_shift) + { + ssize_t d= incomplete_extent_pages - first_page_in_extent[ssize]; + ut_ad(d > 0); + n_blocks_alloc+= d; + } + + return n_blocks_alloc; +} + +size_t buf_pool_t::blocks_in_bytes(size_t n_blocks) noexcept +{ + const size_t shift{srv_page_size_shift}; + const size_t ssize{shift - UNIV_PAGE_SIZE_SHIFT_MIN}; + const size_t extent_size= pages_in_extent[ssize]; + size_t size_in_bytes= n_blocks / extent_size * + innodb_buffer_pool_extent_size; + if (size_t remainder= n_blocks % extent_size) + size_in_bytes+= (remainder + first_page_in_extent[ssize]) << shift; + ut_ad(get_n_blocks(size_in_bytes) == n_blocks); + return size_in_bytes; +} + /** Create the buffer pool. @return whether the creation failed */ -bool buf_pool_t::create() +bool buf_pool_t::create() noexcept { ut_ad(this == &buf_pool); - ut_ad(srv_buf_pool_size % srv_buf_pool_chunk_unit == 0); ut_ad(!is_initialised()); - ut_ad(srv_buf_pool_size > 0); - ut_ad(!resizing); - ut_ad(!chunks_old); + ut_ad(size_in_bytes_requested > 0); + ut_ad(!(size_in_bytes_max & (innodb_buffer_pool_extent_size - 1))); + ut_ad(!(size_in_bytes_requested & ((1U << 20) - 1))); + ut_ad(size_in_bytes_requested <= size_in_bytes_max); /* mariabackup loads tablespaces, and it requires field_ref_zero to be allocated before innodb initialization */ ut_ad(srv_operation >= SRV_OPERATION_RESTORE || !field_ref_zero); - NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - - if (!field_ref_zero) { + if (!field_ref_zero) + { if (auto b= aligned_malloc(UNIV_PAGE_SIZE_MAX, 4096)) + { field_ref_zero= static_cast (memset_aligned<4096>(b, 0, UNIV_PAGE_SIZE_MAX)); - else - return true; + goto init; + } + + oom: + ut_ad(!is_initialised()); + sql_print_error("InnoDB: Cannot map innodb_buffer_pool_size_max=%zum", + size_in_bytes_max >> 20); + return true; } - chunk_t::map_reg= UT_NEW_NOKEY(chunk_t::map()); + init: + DBUG_EXECUTE_IF("ib_buf_chunk_init_fails", goto oom;); + size_t size= size_in_bytes_max; + sql_print_information("InnoDB: innodb_buffer_pool_size_max=%zum," + " innodb_buffer_pool_size=%zum", + size >> 20, size_in_bytes_requested >> 20); - new(&allocator) ut_allocator(mem_key_buf_buf_pool); + retry: + { + NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; +#ifdef _WIN32 + memory_unaligned= my_virtual_mem_reserve(&size); +#else + memory_unaligned= my_large_virtual_alloc(&size); +#endif + } - n_chunks= srv_buf_pool_size / srv_buf_pool_chunk_unit; - const size_t chunk_size= srv_buf_pool_chunk_unit; + if (!memory_unaligned) + goto oom; - chunks= static_cast(ut_zalloc_nokey(n_chunks * sizeof *chunks)); - UT_LIST_INIT(free, &buf_page_t::list); - curr_size= 0; - auto chunk= chunks; + const size_t alignment_waste= + ((~size_t(memory_unaligned) & (innodb_buffer_pool_extent_size - 1)) + 1) & + (innodb_buffer_pool_extent_size - 1); - do + if (size < size_in_bytes_max + alignment_waste) { - if (!chunk->create(chunk_size)) - { - while (--chunk >= chunks) - { - buf_block_t* block= chunk->blocks; + my_virtual_mem_release(memory_unaligned, size); + size+= 1 + + (~size_t(memory_unaligned) & (innodb_buffer_pool_extent_size - 1)); + goto retry; + } - for (auto i= chunk->size; i--; block++) - block->page.lock.free(); + MEM_UNDEFINED(memory_unaligned, size); + memory= memory_unaligned + alignment_waste; + size_unaligned= size; + size-= alignment_waste; + size&= ~(innodb_buffer_pool_extent_size - 1); - allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); - } - ut_free(chunks); - chunks= nullptr; - UT_DELETE(chunk_t::map_reg); - chunk_t::map_reg= nullptr; - aligned_free(const_cast(field_ref_zero)); - field_ref_zero= nullptr; - ut_ad(!is_initialised()); - return true; - } + const size_t actual_size= size_in_bytes_requested; + ut_ad(actual_size <= size); + + size_in_bytes= actual_size; + os_total_large_mem_allocated+= actual_size; - curr_size+= chunk->size; +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, actual_size, &owner); +#endif +#ifndef _AIX + if (!my_virtual_mem_commit(memory, actual_size)) + { + my_virtual_mem_release(memory_unaligned, size_unaligned); + memory= nullptr; + memory_unaligned= nullptr; + goto oom; } - while (++chunk < chunks + n_chunks); + ut_dontdump(memory_unaligned, size_unaligned, true); +#else + update_malloc_size(actual_size, 0); +#endif - ut_ad(is_initialised()); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) + { + struct bitmask *numa_mems_allowed= numa_get_mems_allowed(); + MEM_MAKE_DEFINED(numa_mems_allowed, sizeof *numa_mems_allowed); + if (mbind(memory_unaligned, size_unaligned, MPOL_INTERLEAVE, + numa_mems_allowed->maskp, numa_mems_allowed->size, + MPOL_MF_MOVE)) + sql_print_warning("InnoDB: Failed to set NUMA memory policy of" + " buffer pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + numa_bitmask_free(numa_mems_allowed); + } +#endif /* HAVE_LIBNUMA */ + + n_blocks= get_n_blocks(actual_size); + n_blocks_to_withdraw= 0; + UT_LIST_INIT(free, &buf_page_t::list); + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + { + buf_block_t *block= reinterpret_cast(extent); + const buf_block_t *extent_end= block + pages_in_extent[ssize]; + if (reinterpret_cast(extent_end) > end) + extent_end= reinterpret_cast(end); + MEM_MAKE_DEFINED(block, (extent_end - block) * sizeof *block); + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize]; + block < extent_end; block++, frame+= srv_page_size) + { + ut_ad(!memcmp(block, field_ref_zero, sizeof *block)); + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + } + +#if defined(__aarch64__) mysql_mutex_init(buf_pool_mutex_key, &mutex, MY_MUTEX_INIT_FAST); +#else + mysql_mutex_init(buf_pool_mutex_key, &mutex, nullptr); +#endif + UT_LIST_INIT(withdrawn, &buf_page_t::list); UT_LIST_INIT(LRU, &buf_page_t::LRU); - UT_LIST_INIT(withdraw, &buf_page_t::list); - withdraw_target= 0; UT_LIST_INIT(flush_list, &buf_page_t::list); UT_LIST_INIT(unzip_LRU, &buf_block_t::unzip_LRU); for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i) UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list); - ulint s= curr_size; + ulint s= n_blocks; s/= BUF_READ_AHEAD_PORTION; read_ahead_area= s >= READ_AHEAD_PAGES ? READ_AHEAD_PAGES : my_round_up_to_next_power(static_cast(s)); - curr_pool_size= srv_buf_pool_size; - - n_chunks_new= n_chunks; - page_hash.create(2 * curr_size); - zip_hash.create(2 * curr_size); - last_printout_time= time(NULL); + page_hash.create(2 * n_blocks); + last_printout_time= time(nullptr); mysql_mutex_init(flush_list_mutex_key, &flush_list_mutex, MY_MUTEX_INIT_FAST); @@ -1127,17 +1177,13 @@ bool buf_pool_t::create() io_buf.create((srv_n_read_io_threads + srv_n_write_io_threads) * OS_AIO_N_PENDING_IOS_PER_THREAD); - /* FIXME: remove some of these variables */ - srv_buf_pool_curr_size= curr_pool_size; - srv_buf_pool_old_size= srv_buf_pool_size; - srv_buf_pool_base_size= srv_buf_pool_size; - last_activity_count= srv_get_activity_count(); - chunk_t::map_ref= chunk_t::map_reg; buf_LRU_old_ratio_update(100 * 3 / 8, false); btr_search_sys_create(); + ut_ad(is_initialised()); + sql_print_information("InnoDB: Completed initialization of buffer pool"); return false; } @@ -1172,14 +1218,31 @@ void buf_pool_t::close() noexcept } } - for (auto chunk= chunks + n_chunks; --chunk >= chunks; ) { - buf_block_t *block= chunk->blocks; - - for (auto i= chunk->size; i--; block++) - block->page.lock.free(); + const size_t size{size_in_bytes}; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + MEM_MAKE_DEFINED(&block->page.lock, sizeof &block->page.lock); + block->page.lock.free(); + } - allocator.deallocate_large_dodump(chunk->mem, &chunk->mem_pfx); + ut_dodump(memory_unaligned, size_unaligned); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, size, owner); + owner= nullptr; +#endif + os_total_large_mem_allocated-= size; + my_virtual_mem_decommit(memory, size); + my_virtual_mem_release(memory_unaligned, size_unaligned); + memory= nullptr; + memory_unaligned= nullptr; } pthread_cond_destroy(&done_flush_LRU); @@ -1187,137 +1250,13 @@ void buf_pool_t::close() noexcept pthread_cond_destroy(&do_flush_list); pthread_cond_destroy(&done_free); - ut_free(chunks); - chunks= nullptr; page_hash.free(); - zip_hash.free(); io_buf.close(); - UT_DELETE(chunk_t::map_reg); - chunk_t::map_reg= chunk_t::map_ref= nullptr; aligned_free(const_cast(field_ref_zero)); field_ref_zero= nullptr; } -/** Try to reallocate a control block. -@param block control block to reallocate -@return whether the reallocation succeeded */ -inline bool buf_pool_t::realloc(buf_block_t *block) noexcept -{ - buf_block_t* new_block; - - mysql_mutex_assert_owner(&mutex); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - new_block = buf_LRU_get_free_only(); - - if (new_block == NULL) { - mysql_mutex_lock(&buf_pool.flush_list_mutex); - page_cleaner_wakeup(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - return(false); /* free list was not enough */ - } - - const page_id_t id{block->page.id()}; - hash_chain& chain = page_hash.cell_get(id.fold()); - page_hash_latch& hash_lock = page_hash.lock_get(chain); - /* It does not make sense to use transactional_lock_guard - here, because copying innodb_page_size (4096 to 65536) bytes - as well as other changes would likely make the memory - transaction too large. */ - hash_lock.lock(); - - if (block->page.can_relocate()) { - memcpy_aligned( - new_block->page.frame, block->page.frame, - srv_page_size); - mysql_mutex_lock(&buf_pool.flush_list_mutex); - const auto frame = new_block->page.frame; - new_block->page.lock.free(); - new (&new_block->page) buf_page_t(block->page); - new_block->page.frame = frame; - - /* relocate LRU list */ - if (buf_page_t* prev_b = buf_pool.LRU_remove(&block->page)) { - UT_LIST_INSERT_AFTER(LRU, prev_b, &new_block->page); - } else { - UT_LIST_ADD_FIRST(LRU, &new_block->page); - } - - if (LRU_old == &block->page) { - LRU_old = &new_block->page; - } - - ut_ad(new_block->page.in_LRU_list); - - /* relocate unzip_LRU list */ - if (block->page.zip.data != NULL) { - ut_ad(block->in_unzip_LRU_list); - ut_d(new_block->in_unzip_LRU_list = true); - - buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU, block); - UT_LIST_REMOVE(unzip_LRU, block); - - ut_d(block->in_unzip_LRU_list = false); - block->page.zip.data = NULL; - page_zip_set_size(&block->page.zip, 0); - - if (prev_block != NULL) { - UT_LIST_INSERT_AFTER(unzip_LRU, prev_block, new_block); - } else { - UT_LIST_ADD_FIRST(unzip_LRU, new_block); - } - } else { - ut_ad(!block->in_unzip_LRU_list); - ut_d(new_block->in_unzip_LRU_list = false); - } - - /* relocate page_hash */ - hash_chain& chain = page_hash.cell_get(id.fold()); - ut_ad(&block->page == page_hash.get(id, chain)); - buf_pool.page_hash.replace(chain, &block->page, - &new_block->page); - buf_block_modify_clock_inc(block); - static_assert(FIL_PAGE_OFFSET % 4 == 0, "alignment"); - memset_aligned<4>(block->page.frame - + FIL_PAGE_OFFSET, 0xff, 4); - static_assert(FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID % 4 == 2, - "not perfect alignment"); - memset_aligned<2>(block->page.frame - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4); - MEM_UNDEFINED(block->page.frame, srv_page_size); - block->page.set_state(buf_page_t::REMOVE_HASH); - if (!fsp_is_system_temporary(id.space())) { - buf_flush_relocate_on_flush_list(&block->page, - &new_block->page); - } - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - block->page.set_corrupt_id(); - - /* set other flags of buf_block_t */ - -#ifdef BTR_CUR_HASH_ADAPT - /* This code should only be executed by resize(), - while the adaptive hash index is disabled. */ - assert_block_ahi_empty(block); - assert_block_ahi_empty_on_init(new_block); - ut_ad(!block->index); - new_block->index = NULL; - new_block->n_hash_helps = 0; - new_block->n_fields = 1; - new_block->left_side = TRUE; -#endif /* BTR_CUR_HASH_ADAPT */ - ut_d(block->page.set_state(buf_page_t::MEMORY)); - /* free block */ - new_block = block; - } - - hash_lock.unlock(); - buf_LRU_block_free_non_file_page(new_block); - return(true); /* free_list was enough */ -} - void buf_pool_t::io_buf_t::create(ulint n_slots) noexcept { this->n_slots= n_slots; @@ -1356,619 +1295,520 @@ buf_tmp_buffer_t *buf_pool_t::io_buf_t::reserve(bool wait_for_reads) noexcept } } -/** Sets the global variable that feeds MySQL's innodb_buffer_pool_resize_status -to the specified string. The format and the following parameters are the -same as the ones used for printf(3). -@param[in] fmt format -@param[in] ... extra parameters according to fmt */ -static -void -buf_resize_status( - const char* fmt, - ...) +ATTRIBUTE_COLD bool buf_pool_t::withdraw(buf_page_t &bpage) noexcept { - va_list ap; - - va_start(ap, fmt); - - vsnprintf( - export_vars.innodb_buffer_pool_resize_status, - sizeof(export_vars.innodb_buffer_pool_resize_status), - fmt, ap); - - va_end(ap); - - ib::info() << export_vars.innodb_buffer_pool_resize_status; + mysql_mutex_assert_owner(&mutex); + ut_ad(n_blocks_to_withdraw); + ut_ad(first_to_withdraw); + ut_ad(!bpage.zip.data); + if (&bpage < first_to_withdraw) + return false; + n_blocks_to_withdraw--; + bpage.lock.free(); + UT_LIST_ADD_LAST(withdrawn, &bpage); + return true; } -/** Withdraw blocks from the buffer pool until meeting withdraw_target. -@return whether retry is needed */ -inline bool buf_pool_t::withdraw_blocks() noexcept +ATTRIBUTE_COLD buf_pool_t::shrink_status buf_pool_t::shrink(size_t size) + noexcept { - buf_block_t* block; - ulint loop_count = 0; - - ib::info() << "start to withdraw the last " - << withdraw_target << " blocks"; - - while (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - - /* try to withdraw from free_list */ - ulint count1 = 0; - - mysql_mutex_lock(&mutex); - buf_buddy_condense_free(); - block = reinterpret_cast( - UT_LIST_GET_FIRST(free)); - while (block != NULL - && UT_LIST_GET_LEN(withdraw) < withdraw_target) { - ut_ad(block->page.in_free_list); - ut_ad(!block->page.oldest_modification()); - ut_ad(!block->page.in_LRU_list); - ut_a(!block->page.in_file()); - - buf_block_t* next_block; - next_block = reinterpret_cast( - UT_LIST_GET_NEXT( - list, &block->page)); - - if (will_be_withdrawn(block->page)) { - /* This should be withdrawn */ - UT_LIST_REMOVE(free, &block->page); - UT_LIST_ADD_LAST(withdraw, &block->page); - ut_d(block->in_withdraw_list = true); - count1++; - } - - block = next_block; - } - - /* reserve free_list length */ - if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { - try_LRU_scan = false; - mysql_mutex_unlock(&mutex); - mysql_mutex_lock(&flush_list_mutex); - page_cleaner_wakeup(true); - my_cond_wait(&done_flush_list, - &flush_list_mutex.m_mutex); - mysql_mutex_unlock(&flush_list_mutex); - mysql_mutex_lock(&mutex); - } - - /* relocate blocks/buddies in withdrawn area */ - ulint count2 = 0; - - buf_pool_mutex_exit_forbid(); - for (buf_page_t* bpage = UT_LIST_GET_FIRST(LRU), *next_bpage; - bpage; bpage = next_bpage) { - ut_ad(bpage->in_file()); - next_bpage = UT_LIST_GET_NEXT(LRU, bpage); - if (UNIV_LIKELY_NULL(bpage->zip.data) - && will_be_withdrawn(bpage->zip.data) - && bpage->can_relocate()) { - if (!buf_buddy_realloc( - bpage->zip.data, - page_zip_get_size(&bpage->zip))) { - /* failed to allocate block */ - break; - } - count2++; - if (bpage->frame) { - goto realloc_frame; - } - } - - if (bpage->frame && will_be_withdrawn(*bpage) - && bpage->can_relocate()) { -realloc_frame: - if (!realloc(reinterpret_cast( - bpage))) { - /* failed to allocate block */ - break; - } - count2++; - } - } - buf_pool_mutex_exit_allow(); - mysql_mutex_unlock(&mutex); - - buf_resize_status( - "withdrawing blocks. (" ULINTPF "/" ULINTPF ")", - UT_LIST_GET_LEN(withdraw), - withdraw_target); - - ib::info() << "withdrew " - << count1 << " blocks from free list." - << " Tried to relocate " << count2 << " pages (" - << UT_LIST_GET_LEN(withdraw) << "/" - << withdraw_target << ")"; - - if (++loop_count >= 10) { - /* give up for now. - retried after user threads paused. */ - - ib::info() << "will retry to withdraw later"; - - /* need retry later */ - return(true); - } - } + mysql_mutex_assert_owner(&mutex); + DBUG_EXECUTE_IF("buf_shrink_fail", return SHRINK_ABORT;); + buf_load_abort(); - /* confirm withdrawn enough */ - for (const chunk_t* chunk = chunks + n_chunks_new, - * const echunk = chunks + n_chunks; chunk != echunk; chunk++) { - block = chunk->blocks; - for (ulint j = chunk->size; j--; block++) { - ut_a(block->page.state() == buf_page_t::NOT_USED); - ut_ad(block->in_withdraw_list); - } - } - - ib::info() << "withdrawn target: " << UT_LIST_GET_LEN(withdraw) - << " blocks"; - - return(false); -} - - - -inline void buf_pool_t::page_hash_table::write_lock_all() noexcept -{ - for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) + if (!n_blocks_to_withdraw) { - reinterpret_cast(array[n]).lock(); - if (!n) - break; + withdraw_done: + first_to_withdraw= nullptr; + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) + { + UT_LIST_REMOVE(withdrawn, b); + /* satisfy the check in lazy_allocate() */ + ut_d(memset((void*) b, 0, sizeof(buf_block_t))); + } + return SHRINK_DONE; } -} + buf_buddy_condense_free(size); -inline void buf_pool_t::page_hash_table::write_unlock_all() noexcept -{ - for (auto n= pad(n_cells) & ~ELEMENTS_PER_LATCH;; n-= ELEMENTS_PER_LATCH + 1) + for (buf_page_t *b= UT_LIST_GET_FIRST(free), *next; b; b= next) { - reinterpret_cast(array[n]).unlock(); - if (!n) - break; - } -} - - -namespace -{ + ut_ad(b->in_free_list); + ut_ad(!b->in_LRU_list); + ut_ad(!b->zip.data); + ut_ad(!b->oldest_modification()); + ut_a(b->state() == buf_page_t::NOT_USED); -struct find_interesting_trx -{ - void operator()(const trx_t &trx) - { - if (!trx.is_started()) - return; - if (trx.mysql_thd == nullptr) - return; - if (withdraw_started <= trx.start_time_micro) - return; + next= UT_LIST_GET_NEXT(list, b); - if (!found) + if (b >= first_to_withdraw) { - sql_print_warning("InnoDB: The following trx might hold " - "the blocks in buffer pool to " - "be withdrawn. Buffer pool " - "resizing can complete only " - "after all the transactions " - "below release the blocks."); - found= true; + UT_LIST_REMOVE(free, b); + b->lock.free(); + UT_LIST_ADD_LAST(withdrawn, b); + if (!--n_blocks_to_withdraw) + goto withdraw_done; } - - lock_trx_print_wait_and_mvcc_state(stderr, &trx, current_time); } - bool &found; - /** microsecond_interval_timer() */ - const ulonglong withdraw_started; - const my_hrtime_t current_time; -}; - -} // namespace - -/** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ -inline void buf_pool_t::resize() -{ - ut_ad(this == &buf_pool); - - bool warning = false; - - NUMA_MEMPOLICY_INTERLEAVE_IN_SCOPE; - - ut_ad(!resize_in_progress()); - ut_ad(srv_buf_pool_chunk_unit > 0); - - ulint new_instance_size = srv_buf_pool_size >> srv_page_size_shift; - - buf_resize_status("Resizing buffer pool from " ULINTPF " to " - ULINTPF " (unit=" ULINTPF ").", - srv_buf_pool_old_size, srv_buf_pool_size, - srv_buf_pool_chunk_unit); - -#ifdef BTR_CUR_HASH_ADAPT - /* disable AHI if needed */ - buf_resize_status("Disabling adaptive hash index."); - - btr_search_s_lock_all(); - const bool btr_search_disabled = btr_search_enabled; - btr_search_s_unlock_all(); - - btr_search_disable(); - - if (btr_search_disabled) { - ib::info() << "disabled adaptive hash index."; - } -#endif /* BTR_CUR_HASH_ADAPT */ - - mysql_mutex_lock(&mutex); - ut_ad(n_chunks_new == n_chunks); - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); - - n_chunks_new = (new_instance_size << srv_page_size_shift) - / srv_buf_pool_chunk_unit; - curr_size = n_chunks_new * chunks->size; - mysql_mutex_unlock(&mutex); - - if (is_shrinking()) { - /* set withdraw target */ - size_t w = 0; - - for (const chunk_t* chunk = chunks + n_chunks_new, - * const echunk = chunks + n_chunks; - chunk != echunk; chunk++) - w += chunk->size; - - ut_ad(withdraw_target == 0); - withdraw_target = w; - } - - buf_resize_status("Withdrawing blocks to be shrunken."); - - ulonglong withdraw_started = microsecond_interval_timer(); - ulonglong message_interval = 60ULL * 1000 * 1000; - ulint retry_interval = 1; - -withdraw_retry: - /* wait for the number of blocks fit to the new size (if needed)*/ - bool should_retry_withdraw = is_shrinking() - && withdraw_blocks(); - - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - /* abort to resize for shutdown. */ - return; - } - - /* abort buffer pool load */ - buf_load_abort(); - - const ulonglong current_time = microsecond_interval_timer(); + buf_block_t *block= allocate(); + size_t scanned= 0; + for (buf_page_t *b= lru_scan_itr.start(), *prev; block && b; b= prev) + { + ut_ad(b->in_LRU_list); + ut_a(b->in_file()); - if (should_retry_withdraw - && current_time - withdraw_started >= message_interval) { + prev= UT_LIST_GET_PREV(LRU, b); - if (message_interval > 900000000) { - message_interval = 1800000000; - } else { - message_interval *= 2; - } - - bool found= false; - find_interesting_trx f - {found, withdraw_started, my_hrtime_coarse()}; - withdraw_started = current_time; + if (!b->can_relocate()) + { + next: + if (++scanned & 31) + continue; + /* Avoid starvation by periodically releasing buf_pool.mutex. */ + lru_scan_itr.set(prev); + mysql_mutex_unlock(&mutex); + mysql_mutex_lock(&mutex); + prev= lru_scan_itr.get(); + continue; + } - /* This is going to exceed the maximum size of a - memory transaction. */ - LockMutexGuard g{SRW_LOCK_CALL}; - trx_sys.trx_list.for_each(f); - } + const page_id_t id{b->id()}; + hash_chain &chain= page_hash.cell_get(id.fold()); + page_hash_latch &hash_lock= page_hash.lock_get(chain); + hash_lock.lock(); - if (should_retry_withdraw) { - ib::info() << "Will retry to withdraw " << retry_interval - << " seconds later."; - std::this_thread::sleep_for( - std::chrono::seconds(retry_interval)); + { + /* relocate flush_list and b->page.zip */ + bool have_flush_list_mutex= false; + + switch (b->oldest_modification()) { + case 2: + ut_ad(fsp_is_system_temporary(id.space())); + /* fall through */ + case 0: + break; + default: + mysql_mutex_lock(&flush_list_mutex); + switch (ut_d(lsn_t om=) b->oldest_modification()) { + case 1: + delete_from_flush_list(b); + /* fall through */ + case 0: + mysql_mutex_unlock(&flush_list_mutex); + break; + default: + ut_ad(om != 2); + have_flush_list_mutex= true; + } + } - if (retry_interval > 5) { - retry_interval = 10; - } else { - retry_interval *= 2; - } + if (!b->can_relocate()) + { + next_quick: + if (have_flush_list_mutex) + mysql_mutex_unlock(&flush_list_mutex); + hash_lock.unlock(); + continue; + } - goto withdraw_retry; - } + if (UNIV_UNLIKELY(will_be_withdrawn(b->zip.data, size))) + { + block= buf_buddy_shrink(b, block); + ut_ad(mach_read_from_4(b->zip.data + FIL_PAGE_OFFSET) == id.page_no()); + if (UNIV_UNLIKELY(!n_blocks_to_withdraw)) + { + if (have_flush_list_mutex) + mysql_mutex_unlock(&flush_list_mutex); + hash_lock.unlock(); + if (block) + buf_LRU_block_free_non_file_page(block); + goto withdraw_done; + } + if (!block && !(block= allocate())) + goto next_quick; + } - buf_resize_status("Latching whole of buffer pool."); + if (!b->frame || b < first_to_withdraw) + goto next_quick; -#ifndef DBUG_OFF - { - bool should_wait = true; + ut_ad(is_uncompressed_current(b)); - while (should_wait) { - should_wait = false; - DBUG_EXECUTE_IF( - "ib_buf_pool_resize_wait_before_resize", - should_wait = true; - std::this_thread::sleep_for( - std::chrono::milliseconds(10));); - } - } -#endif /* !DBUG_OFF */ + byte *const frame= block->page.frame; + memcpy_aligned<4096>(frame, b->frame, srv_page_size); + b->lock.free(); + block->page.lock.free(); + new(&block->page) buf_page_t(*b); + block->page.frame= frame; - if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { - return; - } + if (have_flush_list_mutex) + { + buf_flush_relocate_on_flush_list(b, &block->page); + mysql_mutex_unlock(&flush_list_mutex); + } + else + { + ut_d(if (auto om= b->oldest_modification()) ut_ad(om == 2)); + b->oldest_modification_.store(0, std::memory_order_relaxed); + } + } - /* Indicate critical path */ - resizing.store(true, std::memory_order_relaxed); + /* relocate LRU list */ + if (buf_page_t *prev_b= LRU_remove(b)) + UT_LIST_INSERT_AFTER(LRU, prev_b, &block->page); + else + UT_LIST_ADD_FIRST(LRU, &block->page); - mysql_mutex_lock(&mutex); - page_hash.write_lock_all(); + if (LRU_old == b) + LRU_old= &block->page; - chunk_t::map_reg = UT_NEW_NOKEY(chunk_t::map()); + ut_ad(block->page.in_LRU_list); - /* add/delete chunks */ + /* relocate page_hash */ + ut_ad(b == page_hash.get(id, chain)); + page_hash.replace(chain, b, &block->page); + b->id_.set_corrupted(); - buf_resize_status("buffer pool resizing with chunks " - ULINTPF " to " ULINTPF ".", - n_chunks, n_chunks_new); + if (b->zip.data) + { + ut_ad(mach_read_from_4(b->zip.data + FIL_PAGE_OFFSET) == id.page_no()); + b->zip.data= nullptr; + /* relocate unzip_LRU list */ + buf_block_t *old_block= reinterpret_cast(b); + ut_ad(old_block->in_unzip_LRU_list); + ut_d(old_block->in_unzip_LRU_list= false); + ut_d(block->in_unzip_LRU_list= true); + + buf_block_t *prev= UT_LIST_GET_PREV(unzip_LRU, old_block); + UT_LIST_REMOVE(unzip_LRU, old_block); + + if (prev) + UT_LIST_INSERT_AFTER(unzip_LRU, prev, block); + else + UT_LIST_ADD_FIRST(unzip_LRU, block); + } - if (is_shrinking()) { - /* delete chunks */ - chunk_t* chunk = chunks + n_chunks_new; - const chunk_t* const echunk = chunks + n_chunks; + buf_block_modify_clock_inc(block); - ulint sum_freed = 0; +#ifdef BTR_CUR_HASH_ADAPT + assert_block_ahi_empty_on_init(block); + block->index= nullptr; + block->n_hash_helps= 0; + block->n_fields= 1; + block->left_side= true; +#endif /* BTR_CUR_HASH_ADAPT */ + hash_lock.unlock(); - while (chunk < echunk) { - /* buf_LRU_block_free_non_file_page() invokes - MEM_NOACCESS() on any buf_pool.free blocks. - We must cancel the effect of that. In - MemorySanitizer, MEM_NOACCESS() is no-op, so - we must not do anything special for it here. */ -#ifdef HAVE_valgrind -# if !__has_feature(memory_sanitizer) - MEM_MAKE_DEFINED(chunk->mem, chunk->mem_size()); -# endif -#else - MEM_MAKE_ADDRESSABLE(chunk->mem, chunk->size); -#endif + ut_d(b->in_LRU_list= false); - buf_block_t* block = chunk->blocks; + b->set_state(buf_page_t::NOT_USED); + UT_LIST_ADD_LAST(withdrawn, b); + if (!--n_blocks_to_withdraw) + goto withdraw_done; - for (ulint j = chunk->size; j--; block++) { - block->page.lock.free(); - } + block= allocate(); + goto next; + } - allocator.deallocate_large_dodump( - chunk->mem, &chunk->mem_pfx); - sum_freed += chunk->size; - ++chunk; - } + if (block) + buf_LRU_block_free_non_file_page(block); - /* discard withdraw list */ - UT_LIST_INIT(withdraw, &buf_page_t::list); - withdraw_target = 0; + if (!UT_LIST_GET_LEN(LRU) && n_blocks_to_withdraw) + return SHRINK_ABORT; - ib::info() << n_chunks - n_chunks_new - << " chunks (" << sum_freed - << " blocks) were freed."; + if (UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < usable_size() / 20) + return SHRINK_ABORT; - n_chunks = n_chunks_new; - } + mysql_mutex_lock(&flush_list_mutex); - { - /* reallocate chunks */ - const size_t new_chunks_size - = n_chunks_new * sizeof(chunk_t); - - chunk_t* new_chunks = static_cast( - ut_zalloc_nokey_nofatal(new_chunks_size)); - - DBUG_EXECUTE_IF("buf_pool_resize_chunk_null", - ut_free(new_chunks); new_chunks= nullptr; ); - - if (!new_chunks) { - ib::error() << "failed to allocate" - " the chunk array."; - n_chunks_new = n_chunks; - warning = true; - chunks_old = NULL; - goto calc_buf_pool_size; - } + if (LRU_warned && !UT_LIST_GET_FIRST(free)) + { + LRU_warned_clear(); + mysql_mutex_unlock(&flush_list_mutex); + return SHRINK_ABORT; + } - ulint n_chunks_copy = ut_min(n_chunks_new, n_chunks); + try_LRU_scan= false; + mysql_mutex_unlock(&mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); + mysql_mutex_lock(&mutex); - memcpy(new_chunks, chunks, - n_chunks_copy * sizeof *new_chunks); + if (!n_blocks_to_withdraw) + goto withdraw_done; - for (ulint j = 0; j < n_chunks_copy; j++) { - new_chunks[j].reg(); - } + return SHRINK_IN_PROGRESS; +} - chunks_old = chunks; - chunks = new_chunks; - } +inline void buf_pool_t::shrunk(size_t size, size_t reduced) noexcept +{ + ut_ad(size + reduced == size_in_bytes); + size_in_bytes_requested= size; + size_in_bytes= size; + for (size_t n= page_hash.pad(page_hash.n_cells), i= 0; i < n; + i+= page_hash.ELEMENTS_PER_LATCH + 1) + { + auto &latch= reinterpret_cast(page_hash.array[i]); + latch.lock(); + /* We already shrunk size_in_bytes. The exclusive lock here + ensures that any page_guess() will detect an out-of-bounds + guess before we invoke my_virtual_mem_decommit() below. */ + latch.unlock(); + } + my_virtual_mem_decommit(memory + size, reduced); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_free)(mem_key_buf_buf_pool, reduced, owner); +#endif +} - if (n_chunks_new > n_chunks) { - /* add chunks */ - ulint sum_added = 0; - ulint n = n_chunks; - const size_t unit = srv_buf_pool_chunk_unit; - - for (chunk_t* chunk = chunks + n_chunks, - * const echunk = chunks + n_chunks_new; - chunk != echunk; chunk++) { - if (!chunk->create(unit)) { - ib::error() << "failed to allocate" - " memory for buffer pool chunk"; - - warning = true; - n_chunks_new = n_chunks; - break; - } +ATTRIBUTE_COLD void buf_pool_t::resize(size_t size, THD *thd) noexcept +{ + ut_ad(this == &buf_pool); + mysql_mutex_assert_owner(&LOCK_global_system_variables); + ut_ad(size <= size_in_bytes_max); + if (my_use_large_pages) + { + my_error(ER_VARIABLE_IS_READONLY, MYF(0), "InnoDB", + "innodb_buffer_pool_size", "large_pages=0"); + return; + } - sum_added += chunk->size; - ++n; - } + size_t n_blocks_new= get_n_blocks(size); - ib::info() << n_chunks_new - n_chunks - << " chunks (" << sum_added - << " blocks) were added."; + mysql_mutex_lock(&mutex); - n_chunks = n; - } -calc_buf_pool_size: - /* recalc curr_size */ - ulint new_size = 0; + const size_t old_size= size_in_bytes; + if (first_to_withdraw || old_size != size_in_bytes_requested) + { + mysql_mutex_unlock(&mutex); + my_printf_error(ER_WRONG_USAGE, + "innodb_buffer_pool_size change is already in progress", + MYF(0)); + return; + } - { - chunk_t* chunk = chunks; - const chunk_t* const echunk = chunk + n_chunks; - do { - new_size += chunk->size; - } while (++chunk != echunk); - } + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); - curr_size = new_size; - n_chunks_new = n_chunks; + if (size == old_size) + { + mysql_mutex_unlock(&mutex); + return; + } - if (chunks_old) { - ut_free(chunks_old); - chunks_old = NULL; - } +#ifdef BTR_CUR_HASH_ADAPT + bool ahi_disabled= false; +#endif - chunk_t::map* chunk_map_old = chunk_t::map_ref; - chunk_t::map_ref = chunk_t::map_reg; + const bool significant_change= + n_blocks_new > n_blocks * 2 || n_blocks > n_blocks_new * 2; + const ssize_t n_blocks_removed= n_blocks - n_blocks_new; - /* set size */ - ut_ad(UT_LIST_GET_LEN(withdraw) == 0); - ulint s= curr_size; - s/= BUF_READ_AHEAD_PORTION; - read_ahead_area= s >= READ_AHEAD_PAGES - ? READ_AHEAD_PAGES - : my_round_up_to_next_power(static_cast(s)); - curr_pool_size= n_chunks * srv_buf_pool_chunk_unit; - srv_buf_pool_curr_size= curr_pool_size;/* FIXME: remove*/ - extern ulonglong innobase_buffer_pool_size; - innobase_buffer_pool_size= buf_pool_size_align(srv_buf_pool_curr_size); + if (n_blocks_removed <= 0) + { + if (!my_virtual_mem_commit(memory + old_size, size - old_size)) + { + mysql_mutex_unlock(&mutex); + sql_print_error("InnoDB: Cannot commit innodb_buffer_pool_size=%zum;" + " retaining innodb_buffer_pool_size=%zum", + size >> 20, old_size >> 20); + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return; + } - const bool new_size_too_diff - = srv_buf_pool_base_size > srv_buf_pool_size * 2 - || srv_buf_pool_base_size * 2 < srv_buf_pool_size; + ut_dontdump(memory + old_size, size - old_size, true); + size_in_bytes_requested= size; + size_in_bytes= size; - mysql_mutex_unlock(&mutex); - page_hash.write_unlock_all(); + { + const size_t ssize= srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN; + const size_t pages= pages_in_extent[ssize]; + const size_t first_extent= n_blocks / pages; - UT_DELETE(chunk_map_old); + char *extent= memory + first_extent * innodb_buffer_pool_extent_size; - resizing.store(false, std::memory_order_relaxed); + buf_block_t *block= reinterpret_cast(extent); + if (const size_t first_blocks= n_blocks % pages) + { + /* Extend the last (partial) extent until its end */ + const buf_block_t *extent_end= block + + (first_extent == (n_blocks_new / pages) + ? (n_blocks_new % pages) + : pages); + block+= first_blocks; + memset((void*) block, 0, (extent_end - block) * sizeof *block); + + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize] + + (first_blocks << srv_page_size_shift); block < extent_end; + block++, frame+= srv_page_size) + { + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + extent+= innodb_buffer_pool_extent_size; + } - /* Normalize other components, if the new size is too different */ - if (!warning && new_size_too_diff) { - srv_buf_pool_base_size = srv_buf_pool_size; + /* Fill in further extents; @see buf_pool_t::create() */ + for (const char *const end_new= memory + + block_descriptors_in_bytes(n_blocks_new); + extent < end_new; extent+= innodb_buffer_pool_extent_size) + { + block= reinterpret_cast(extent); + const buf_block_t *extent_end= block + pages; + if (reinterpret_cast(extent_end) > end_new) + extent_end= reinterpret_cast(end_new); + + memset((void*) block, 0, (extent_end - block) * sizeof *block); + for (byte *frame= reinterpret_cast(extent) + + first_frame_in_extent[ssize]; + block < extent_end; block++, frame+= srv_page_size) + { + block->page.frame= frame; + block->page.lock.init(); + UT_LIST_ADD_LAST(free, &block->page); + ut_d(block->page.in_free_list= true); + } + } + } - buf_resize_status("Resizing also other hash tables."); + mysql_mutex_unlock(&LOCK_global_system_variables); + resized: + ut_ad(UT_LIST_GET_LEN(withdrawn) == 0); + ut_ad(n_blocks_to_withdraw == 0); + ut_ad(!first_to_withdraw); + const size_t old_blocks{n_blocks}; + n_blocks= n_blocks_new; - srv_lock_table_size = 5 - * (srv_buf_pool_size >> srv_page_size_shift); - lock_sys.resize(srv_lock_table_size); - dict_sys.resize(); + size_t s= n_blocks_new / BUF_READ_AHEAD_PORTION; + read_ahead_area= s >= READ_AHEAD_PAGES + ? READ_AHEAD_PAGES + : my_round_up_to_next_power(uint32(s)); - ib::info() << "Resized hash tables at lock_sys," -#ifdef BTR_CUR_HASH_ADAPT - " adaptive hash index," -#endif /* BTR_CUR_HASH_ADAPT */ - " dictionary."; - } + if (ssize_t d= size - old_size) + { + os_total_large_mem_allocated+= d; + if (d > 0) + { + /* Already committed memory earlier */ + ut_ad(n_blocks_removed <= 0); +#ifdef UNIV_PFS_MEMORY + PSI_MEMORY_CALL(memory_alloc)(mem_key_buf_buf_pool, d, &owner); +#endif + } + else + shrunk(size, size_t(-d)); + } - /* normalize ibuf.max_size */ - ibuf_max_size_update(srv_change_buffer_max_size); + mysql_mutex_unlock(&mutex); - if (srv_buf_pool_old_size != srv_buf_pool_size) { + if (significant_change) + { + sql_print_information("InnoDB: Resizing hash tables"); + srv_lock_table_size= 5 * n_blocks_new; + lock_sys.resize(srv_lock_table_size); + dict_sys.resize(); + } - ib::info() << "Completed to resize buffer pool from " - << srv_buf_pool_old_size - << " to " << srv_buf_pool_size << "."; - srv_buf_pool_old_size = srv_buf_pool_size; - } + ibuf_max_size_update(srv_change_buffer_max_size); +#ifdef BTR_CUR_HASH_ADAPT + if (ahi_disabled) + btr_search_enable(true); +#endif + if (n_blocks_removed) + sql_print_information("InnoDB: innodb_buffer_pool_size=%zum (%zu pages)" + " resized from %zum (%zu pages)", + size >> 20, n_blocks_new, old_size >> 20, + old_blocks); + mysql_mutex_lock(&LOCK_global_system_variables); + } + else + { + size_t to_withdraw= size_t(n_blocks_removed); + n_blocks_to_withdraw= to_withdraw; + first_to_withdraw= &get_nth_page(n_blocks_new)->page; + size_in_bytes_requested= size; + mysql_mutex_unlock(&LOCK_global_system_variables); + mysql_mutex_unlock(&mutex); + DEBUG_SYNC_C("buf_pool_shrink_before_wakeup"); + mysql_mutex_lock(&flush_list_mutex); + page_cleaner_wakeup(true); + my_cond_wait(&done_flush_list, &flush_list_mutex.m_mutex); + mysql_mutex_unlock(&flush_list_mutex); #ifdef BTR_CUR_HASH_ADAPT - /* enable AHI if needed */ - if (btr_search_disabled) { - btr_search_enable(true); - ib::info() << "Re-enabled adaptive hash index."; - } + ahi_disabled= btr_search_disable(); #endif /* BTR_CUR_HASH_ADAPT */ + mysql_mutex_lock(&mutex); - char now[32]; - - ut_sprintf_timestamp(now); - if (!warning) { - buf_resize_status("Completed resizing buffer pool at %s.", - now); - } else { - buf_resize_status("Resizing buffer pool failed," - " finished resizing at %s.", now); - } + time_t last_message= 0; - ut_d(validate()); + do + { + time_t now= time(nullptr); + if (now - last_message > 15) + { + if (last_message != 0 && to_withdraw == n_blocks_to_withdraw) + break; + to_withdraw= n_blocks_to_withdraw; + last_message= now; + sql_print_information("InnoDB: Trying to shrink" + " innodb_buffer_pool_size=%zum (%zu pages)" + " from %zum (%zu pages, to withdraw %zu)", + size >> 20, n_blocks_new, + old_size >> 20, n_blocks, to_withdraw); + } + shrink_status s{shrink(size)}; + if (s == SHRINK_DONE) + goto resized; + if (s != SHRINK_IN_PROGRESS) + break; + } + while (!thd_kill_level(thd)); - return; -} + ut_ad(size_in_bytes > size_in_bytes_requested); + n_blocks_to_withdraw= 0; + first_to_withdraw= nullptr; + size_in_bytes_requested= size_in_bytes; -/** Thread pool task invoked by innodb_buffer_pool_size changes. */ -static void buf_resize_callback(void *) -{ - DBUG_ENTER("buf_resize_callback"); - ut_ad(srv_shutdown_state < SRV_SHUTDOWN_CLEANUP); - mysql_mutex_lock(&buf_pool.mutex); - const auto size= srv_buf_pool_size; - const bool work= srv_buf_pool_old_size != size; - mysql_mutex_unlock(&buf_pool.mutex); + while (buf_page_t *b= UT_LIST_GET_FIRST(withdrawn)) + { + ut_ad(!b->oldest_modification()); + ut_ad(b->state() == buf_page_t::NOT_USED); + UT_LIST_REMOVE(withdrawn, b); + UT_LIST_ADD_LAST(free, b); + ut_d(b->in_free_list= true); + b->lock.init(); + } - if (work) - buf_pool.resize(); - else - { - std::ostringstream sout; - sout << "Size did not change: old size = new size = " << size; - buf_resize_status(sout.str().c_str()); + mysql_mutex_unlock(&mutex); + my_printf_error(ER_WRONG_USAGE, "innodb_buffer_pool_size change aborted", + MYF(ME_ERROR_LOG)); +#ifdef BTR_CUR_HASH_ADAPT + if (ahi_disabled) + btr_search_enable(true); +#endif + mysql_mutex_lock(&LOCK_global_system_variables); } - DBUG_VOID_RETURN; -} - -/* Ensure that task does not run in parallel, by setting max_concurrency to 1 for the thread group */ -static tpool::task_group single_threaded_group(1); -static tpool::waitable_task buf_resize_task(buf_resize_callback, - nullptr, &single_threaded_group); - -void buf_resize_start() -{ - srv_thread_pool->submit_task(&buf_resize_task); -} -void buf_resize_shutdown() -{ - buf_resize_task.wait(); + ut_d(validate()); } - /** Relocate a ROW_FORMAT=COMPRESSED block in the LRU list and buf_pool.page_hash. The caller must relocate bpage->list. @param bpage ROW_FORMAT=COMPRESSED only block @param dpage destination control block */ -static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) +static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) noexcept { const page_id_t id{bpage->id()}; buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); ut_ad(!bpage->frame); mysql_mutex_assert_owner(&buf_pool.mutex); + ut_ad(mach_read_from_4(bpage->zip.data + FIL_PAGE_OFFSET) == id.page_no()); ut_ad(buf_pool.page_hash.lock_get(chain).is_write_locked()); ut_ad(bpage == buf_pool.page_hash.get(id, chain)); ut_ad(!buf_pool.watch_is_sentinel(*bpage)); @@ -1977,6 +1817,7 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) ut_ad(state <= buf_page_t::READ_FIX); ut_ad(bpage->lock.is_write_locked()); const auto frame= dpage->frame; + ut_ad(frame == reinterpret_cast(dpage)->frame_address()); dpage->lock.free(); new (dpage) buf_page_t(*bpage); @@ -2048,7 +1889,6 @@ buf_page_t *buf_pool_t::watch_set(const page_id_t id, ut_ad(w->access_time == 0); ut_ad(!w->oldest_modification()); ut_ad(!w->zip.data); - ut_ad(!w->in_zip_hash); static_assert(buf_page_t::NOT_USED == 0, "efficiency"); if (ut_d(auto s=) w->state()) { @@ -2329,6 +2169,8 @@ bool buf_zip_decompress(buf_block_t *block, bool check) noexcept ut_ad(block->zip_size()); ut_a(block->page.id().space() != 0); + ut_ad(mach_read_from_4(frame + FIL_PAGE_OFFSET) + == block->page.id().page_no()); if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) { @@ -2567,7 +2409,6 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id, if (b && !watch_is_sentinel(*b)) { uint32_t state= b->fix() + 1; - ut_ad(!b->in_zip_hash); hash_lock.unlock_shared(); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) @@ -2597,7 +2438,8 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id, return reinterpret_cast(-1); } - if (UNIV_LIKELY(b->frame != nullptr)); + if (UNIV_LIKELY(b->frame != nullptr)) + ut_ad(b->frame==reinterpret_cast(b)->frame_address()); else if (state < buf_page_t::READ_FIX) goto unzip; else @@ -2663,6 +2505,29 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id, } } +TRANSACTIONAL_TARGET +uint32_t buf_pool_t::page_guess(buf_block_t *b, page_hash_latch &latch, + const page_id_t id) noexcept +{ + transactional_shared_lock_guard g{latch}; + if (UNIV_UNLIKELY(reinterpret_cast(b) >= memory + size_in_bytes)) + /* shrunk() made the memory inaccessible. */ + return 0; + const page_id_t block_id{b->page.id()}; + + if (id == block_id) + { + uint32_t state= b->page.state(); + /* Ignore guesses that point to read-fixed blocks. We can only + avoid a race condition by looking up the block via page_hash. */ + if ((state >= buf_page_t::FREED && state < buf_page_t::READ_FIX) || + state >= buf_page_t::WRITE_FIX) + return b->page.fix(); + ut_ad(b->page.frame); + } + return 0; +} + /** Low level function used to get access to a database page. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 @@ -2727,22 +2592,9 @@ buf_page_get_low( buf_block_t* block = guess; uint32_t state; - if (block) { - transactional_shared_lock_guard g{hash_lock}; - if (buf_pool.is_uncompressed(block) - && page_id == block->page.id()) { - state = block->page.state(); - /* Ignore guesses that point to read-fixed blocks. - We can only avoid a race condition by - looking up the block via buf_pool.page_hash. */ - if ((state >= buf_page_t::FREED - && state < buf_page_t::READ_FIX) - || state >= buf_page_t::WRITE_FIX) { - ut_ad(!block->page.in_zip_hash); - state = block->page.fix(); - goto got_block; - } - } + if (block + && (state = buf_pool.page_guess(block, hash_lock, page_id))) { + goto got_block; } guess = nullptr; @@ -2812,7 +2664,6 @@ buf_page_get_low( goto loop; got_block: - ut_ad(!block->page.in_zip_hash); state++; got_block_fixed: ut_ad(state > buf_page_t::FREED); @@ -3017,6 +2868,7 @@ buf_page_get_low( btr_search_drop_page_hash_index(block, true); #endif /* BTR_CUR_HASH_ADAPT */ + ut_ad(block->page.frame == block->frame_address()); ut_ad(page_id_t(page_get_space_id(block->page.frame), page_get_page_no(block->page.frame)) == page_id); return block; @@ -3122,21 +2974,19 @@ buf_page_get_gen( return block; } -TRANSACTIONAL_TARGET buf_block_t *buf_page_optimistic_fix(buf_block_t *block, page_id_t id) noexcept { buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(id.fold()); - transactional_shared_lock_guard g - {buf_pool.page_hash.lock_get(chain)}; - if (UNIV_UNLIKELY(!buf_pool.is_uncompressed(block) || - id != block->page.id() || !block->page.frame)) - return nullptr; - const auto state= block->page.state(); - if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED || - state >= buf_page_t::READ_FIX)) - return nullptr; - block->page.fix(); - return block; + page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain); + if (uint32_t state= buf_pool.page_guess(block, hash_lock, id)) + { + if (UNIV_LIKELY(state >= buf_page_t::UNFIXED)) + return block; + else + /* Refuse access to pages that are marked as freed in the data file. */ + block->page.unfix(); + } + return nullptr; } buf_block_t *buf_page_optimistic_get(buf_block_t *block, @@ -3338,6 +3188,7 @@ static buf_block_t *buf_page_create_low(page_id_t page_id, ulint zip_size, { mysql_mutex_unlock(&buf_pool.mutex); buf_block_t *block= reinterpret_cast(bpage); + ut_ad(bpage->frame == block->frame_address()); mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); #ifdef BTR_CUR_HASH_ADAPT drop_hash_entry= block->index; @@ -3373,7 +3224,8 @@ static buf_block_t *buf_page_create_low(page_id_t page_id, ulint zip_size, else { mysql_mutex_unlock(&buf_pool.mutex); - ut_ad(bpage->frame); + ut_ad(bpage->frame == + reinterpret_cast(bpage)->frame_address()); #ifdef BTR_CUR_HASH_ADAPT ut_ad(!reinterpret_cast(bpage)->index); #endif @@ -3829,6 +3681,61 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node) noexcept return DB_SUCCESS; } +#ifdef BTR_CUR_HASH_ADAPT +/** Clear the adaptive hash index on all pages in the buffer pool. */ +ATTRIBUTE_COLD void buf_pool_t::clear_hash_index() noexcept +{ + std::set garbage; + + mysql_mutex_lock(&mutex); + ut_ad(!btr_search_enabled); + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + dict_index_t *index= block->index; + assert_block_ahi_valid(block); + + /* We can clear block->index and block->n_pointers when + holding all AHI latches exclusively; see the comments in buf0buf.h */ + + if (!index) + { +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + ut_a(!block->n_pointers); +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + continue; + } + + ut_d(const auto s= block->page.state()); + /* Another thread may have set the state to + REMOVE_HASH in buf_LRU_block_remove_hashed(). + + The state change in buf_pool_t::resize() is not observable + here, because in that case we would have !block->index. + + In the end, the entire adaptive hash index will be removed. */ + ut_ad(s >= buf_page_t::UNFIXED || s == buf_page_t::REMOVE_HASH); +# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + block->n_pointers= 0; +# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + if (index->freed()) + garbage.insert(index); + block->index= nullptr; + } + + mysql_mutex_unlock(&mutex); + + for (dict_index_t *index : garbage) + btr_search_lazy_free(index); +} +#endif /* BTR_CUR_HASH_ADAPT */ + #ifdef UNIV_DEBUG /** Check that all blocks are in a replaceable state. @return address of a non-free block @@ -3836,10 +3743,44 @@ dberr_t buf_page_t::read_complete(const fil_node_t &node) noexcept void buf_pool_t::assert_all_freed() noexcept { mysql_mutex_lock(&mutex); - const chunk_t *chunk= chunks; - for (auto i= n_chunks; i--; chunk++) - if (const buf_block_t* block= chunk->not_freed()) - ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; + + for (char *extent= memory, + *end= memory + block_descriptors_in_bytes(n_blocks); + extent < end; extent+= innodb_buffer_pool_extent_size) + for (buf_block_t *block= reinterpret_cast(extent), + *extent_end= block + + pages_in_extent[srv_page_size_shift - UNIV_PAGE_SIZE_SHIFT_MIN]; + block < extent_end && reinterpret_cast(block) < end; block++) + { + if (!block->page.in_file()) + continue; + switch (const lsn_t lsn= block->page.oldest_modification()) { + case 0: + case 1: + break; + + case 2: + ut_ad(fsp_is_system_temporary(block->page.id().space())); + break; + + default: + if (srv_read_only_mode) + { + /* The page cleaner is disabled in read-only mode. No pages + can be dirtied, so all of them must be clean. */ + ut_ad(lsn == recv_sys.recovered_lsn || + srv_force_recovery == SRV_FORCE_NO_LOG_REDO); + break; + } + + goto fixed_or_dirty; + } + + if (!block->page.can_relocate()) + fixed_or_dirty: + ib::fatal() << "Page " << block->page.id() << " still fixed or dirty"; + } + mysql_mutex_unlock(&mutex); } #endif /* UNIV_DEBUG */ @@ -3889,40 +3830,35 @@ void buf_pool_t::validate() noexcept mysql_mutex_lock(&mutex); - chunk_t* chunk = chunks; - /* Check the uncompressed blocks. */ - for (auto i = n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; + for (ulint i = 0; i < n_blocks; i++) { + const buf_block_t* block = get_nth_page(i); + ut_ad(block->page.frame == block->frame_address()); - for (auto j = chunk->size; j--; block++) { - ut_ad(block->page.frame); - switch (const auto f = block->page.state()) { - case buf_page_t::NOT_USED: - n_free++; - break; - - case buf_page_t::MEMORY: - case buf_page_t::REMOVE_HASH: - /* do nothing */ + switch (const auto f = block->page.state()) { + case buf_page_t::NOT_USED: + ut_ad(!block->page.in_LRU_list); + n_free++; + break; + case buf_page_t::MEMORY: + case buf_page_t::REMOVE_HASH: + /* do nothing */ + break; + default: + if (f >= buf_page_t::READ_FIX + && f < buf_page_t::WRITE_FIX) { + /* A read-fixed block is not + necessarily in the page_hash yet. */ break; - - default: - if (f >= buf_page_t::READ_FIX - && f < buf_page_t::WRITE_FIX) { - /* A read-fixed block is not - necessarily in the page_hash yet. */ - break; - } - ut_ad(f >= buf_page_t::FREED); - const page_id_t id{block->page.id()}; - ut_ad(page_hash.get( - id, - page_hash.cell_get(id.fold())) - == &block->page); - n_lru++; } + ut_ad(f >= buf_page_t::FREED); + const page_id_t id{block->page.id()}; + ut_ad(page_hash.get( + id, + page_hash.cell_get(id.fold())) + == &block->page); + n_lru++; } } @@ -3947,24 +3883,11 @@ void buf_pool_t::validate() noexcept ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing); mysql_mutex_unlock(&flush_list_mutex); - - if (n_chunks_new == n_chunks - && n_lru + n_free > curr_size + n_zip) { - - ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free - << ", pool " << curr_size - << " zip " << n_zip << ". Aborting..."; - } - + ut_ad(n_lru + n_free <= n_blocks + n_zip); ut_ad(UT_LIST_GET_LEN(LRU) >= n_lru); - - if (n_chunks_new == n_chunks - && UT_LIST_GET_LEN(free) != n_free) { - - ib::fatal() << "Free list len " - << UT_LIST_GET_LEN(free) - << ", free blocks " << n_free << ". Aborting..."; - } + ut_ad(UT_LIST_GET_LEN(free) <= n_free); + ut_ad(size_in_bytes != size_in_bytes_requested + || UT_LIST_GET_LEN(free) == n_free); mysql_mutex_unlock(&mutex); @@ -3979,26 +3902,23 @@ void buf_pool_t::print() noexcept { index_id_t* index_ids; ulint* counts; - ulint size; ulint i; - ulint j; index_id_t id; ulint n_found; - chunk_t* chunk; dict_index_t* index; - size = curr_size; + mysql_mutex_lock(&mutex); index_ids = static_cast( - ut_malloc_nokey(size * sizeof *index_ids)); + ut_malloc_nokey(n_blocks * sizeof *index_ids)); - counts = static_cast(ut_malloc_nokey(sizeof(ulint) * size)); + counts = static_cast( + ut_malloc_nokey(sizeof(ulint) * n_blocks)); - mysql_mutex_lock(&mutex); mysql_mutex_lock(&flush_list_mutex); ib::info() - << "[buffer pool: size=" << curr_size + << "[buffer pool: size=" << n_blocks << ", database pages=" << UT_LIST_GET_LEN(LRU) << ", free pages=" << UT_LIST_GET_LEN(free) << ", modified database pages=" @@ -4018,38 +3938,28 @@ void buf_pool_t::print() noexcept n_found = 0; - chunk = chunks; - - for (i = n_chunks; i--; chunk++) { - buf_block_t* block = chunk->blocks; - ulint n_blocks = chunk->size; - - for (; n_blocks--; block++) { - const buf_frame_t* frame = block->page.frame; + for (size_t i = 0; i < n_blocks; i++) { + buf_block_t* block = get_nth_page(i); + const buf_frame_t* frame = block->page.frame; + ut_ad(frame == block->frame_address()); - if (fil_page_index_page_check(frame)) { + if (fil_page_index_page_check(frame)) { - id = btr_page_get_index_id(frame); + id = btr_page_get_index_id(frame); - /* Look for the id in the index_ids array */ - j = 0; - - while (j < n_found) { - - if (index_ids[j] == id) { - counts[j]++; - - break; - } - j++; - } - - if (j == n_found) { - n_found++; - index_ids[j] = id; - counts[j] = 1; + /* Look for the id in the index_ids array */ + for (ulint j = 0; j < n_found; j++) { + if (index_ids[j] == id) { + counts[j]++; + goto found; } } + + index_ids[n_found] = id; + counts[n_found] = 1; + n_found++; +found: + continue; } } @@ -4083,138 +3993,78 @@ ulint buf_get_latched_pages_number() noexcept { ulint fixed_pages_number= 0; - mysql_mutex_lock(&buf_pool.mutex); + mysql_mutex_assert_owner(&buf_pool.mutex); for (buf_page_t *b= UT_LIST_GET_FIRST(buf_pool.LRU); b; b= UT_LIST_GET_NEXT(LRU, b)) if (b->state() > buf_page_t::UNFIXED) fixed_pages_number++; - mysql_mutex_unlock(&buf_pool.mutex); - return fixed_pages_number; } #endif /* UNIV_DEBUG */ -/** Collect buffer pool metadata. -@param[out] pool_info buffer pool metadata */ -void buf_stats_get_pool_info(buf_pool_info_t *pool_info) noexcept +void buf_pool_t::get_info(buf_pool_info_t *pool_info) noexcept { - time_t current_time; - double time_elapsed; - - mysql_mutex_lock(&buf_pool.mutex); - - pool_info->pool_size = buf_pool.curr_size; - - pool_info->lru_len = UT_LIST_GET_LEN(buf_pool.LRU); - - pool_info->old_lru_len = buf_pool.LRU_old_len; - - pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool.free); - - mysql_mutex_lock(&buf_pool.flush_list_mutex); - pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool.flush_list); - - pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool.unzip_LRU); - - pool_info->n_pend_reads = os_aio_pending_reads_approx(); - - pool_info->n_pending_flush_lru = buf_pool.n_flush(); - - pool_info->n_pending_flush_list = os_aio_pending_writes(); - mysql_mutex_unlock(&buf_pool.flush_list_mutex); - - current_time = time(NULL); - time_elapsed = 0.001 + difftime(current_time, - buf_pool.last_printout_time); - - pool_info->n_pages_made_young = buf_pool.stat.n_pages_made_young; - - pool_info->n_pages_not_made_young = - buf_pool.stat.n_pages_not_made_young; - - pool_info->n_pages_read = buf_pool.stat.n_pages_read; - - pool_info->n_pages_created = buf_pool.stat.n_pages_created; - - pool_info->n_pages_written = buf_pool.stat.n_pages_written; - - pool_info->n_page_gets = buf_pool.stat.n_page_gets; - - pool_info->n_ra_pages_read_rnd = buf_pool.stat.n_ra_pages_read_rnd; - pool_info->n_ra_pages_read = buf_pool.stat.n_ra_pages_read; - - pool_info->n_ra_pages_evicted = buf_pool.stat.n_ra_pages_evicted; - - pool_info->page_made_young_rate = - static_cast(buf_pool.stat.n_pages_made_young - - buf_pool.old_stat.n_pages_made_young) - / time_elapsed; - - pool_info->page_not_made_young_rate = - static_cast(buf_pool.stat.n_pages_not_made_young - - buf_pool.old_stat.n_pages_not_made_young) - / time_elapsed; - - pool_info->pages_read_rate = - static_cast(buf_pool.stat.n_pages_read - - buf_pool.old_stat.n_pages_read) - / time_elapsed; - - pool_info->pages_created_rate = - static_cast(buf_pool.stat.n_pages_created - - buf_pool.old_stat.n_pages_created) - / time_elapsed; - - pool_info->pages_written_rate = - static_cast(buf_pool.stat.n_pages_written - - buf_pool.old_stat.n_pages_written) - / time_elapsed; - - pool_info->n_page_get_delta = buf_pool.stat.n_page_gets - - buf_pool.old_stat.n_page_gets; - - if (pool_info->n_page_get_delta) { - pool_info->page_read_delta = buf_pool.stat.n_pages_read - - buf_pool.old_stat.n_pages_read; - - pool_info->young_making_delta = - buf_pool.stat.n_pages_made_young - - buf_pool.old_stat.n_pages_made_young; - - pool_info->not_young_making_delta = - buf_pool.stat.n_pages_not_made_young - - buf_pool.old_stat.n_pages_not_made_young; - } - pool_info->pages_readahead_rnd_rate = - static_cast(buf_pool.stat.n_ra_pages_read_rnd - - buf_pool.old_stat.n_ra_pages_read_rnd) - / time_elapsed; - - - pool_info->pages_readahead_rate = - static_cast(buf_pool.stat.n_ra_pages_read - - buf_pool.old_stat.n_ra_pages_read) - / time_elapsed; - - pool_info->pages_evicted_rate = - static_cast(buf_pool.stat.n_ra_pages_evicted - - buf_pool.old_stat.n_ra_pages_evicted) - / time_elapsed; - - pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool.unzip_LRU); - - pool_info->io_sum = buf_LRU_stat_sum.io; - - pool_info->io_cur = buf_LRU_stat_cur.io; - - pool_info->unzip_sum = buf_LRU_stat_sum.unzip; + mysql_mutex_lock(&mutex); + pool_info->pool_size= curr_size(); + pool_info->lru_len= UT_LIST_GET_LEN(LRU); + pool_info->old_lru_len= LRU_old_len; + pool_info->free_list_len= UT_LIST_GET_LEN(free); - pool_info->unzip_cur = buf_LRU_stat_cur.unzip; + mysql_mutex_lock(&flush_list_mutex); + pool_info->flush_list_len= UT_LIST_GET_LEN(flush_list); + pool_info->n_pend_unzip= UT_LIST_GET_LEN(unzip_LRU); + pool_info->n_pend_reads= os_aio_pending_reads_approx(); + pool_info->n_pending_flush_lru= n_flush(); + pool_info->n_pending_flush_list= os_aio_pending_writes(); + mysql_mutex_unlock(&flush_list_mutex); - buf_refresh_io_stats(); - mysql_mutex_unlock(&buf_pool.mutex); + double elapsed= 0.001 + difftime(time(nullptr), last_printout_time); + + pool_info->n_pages_made_young= stat.n_pages_made_young; + pool_info->page_made_young_rate= + double(stat.n_pages_made_young - old_stat.n_pages_made_young) / + elapsed; + pool_info->n_pages_not_made_young= stat.n_pages_not_made_young; + pool_info->page_not_made_young_rate= + double(stat.n_pages_not_made_young - old_stat.n_pages_not_made_young) / + elapsed; + pool_info->n_pages_read= stat.n_pages_read; + pool_info->pages_read_rate= + double(stat.n_pages_read - old_stat.n_pages_read) / elapsed; + pool_info->n_pages_created= stat.n_pages_created; + pool_info->pages_created_rate= + double(stat.n_pages_created - old_stat.n_pages_created) / elapsed; + pool_info->n_pages_written= stat.n_pages_written; + pool_info->pages_written_rate= + double(stat.n_pages_written - old_stat.n_pages_written) / elapsed; + pool_info->n_page_gets= stat.n_page_gets; + pool_info->n_page_get_delta= stat.n_page_gets - old_stat.n_page_gets; + if (pool_info->n_page_get_delta) + { + pool_info->page_read_delta= stat.n_pages_read - old_stat.n_pages_read; + pool_info->young_making_delta= + stat.n_pages_made_young - old_stat.n_pages_made_young; + pool_info->not_young_making_delta= + stat.n_pages_not_made_young - old_stat.n_pages_not_made_young; + } + pool_info->n_ra_pages_read_rnd= stat.n_ra_pages_read_rnd; + pool_info->pages_readahead_rnd_rate= + double(stat.n_ra_pages_read_rnd - old_stat.n_ra_pages_read_rnd) / elapsed; + pool_info->n_ra_pages_read= stat.n_ra_pages_read; + pool_info->pages_readahead_rate= + double(stat.n_ra_pages_read - old_stat.n_ra_pages_read) / elapsed; + pool_info->n_ra_pages_evicted= stat.n_ra_pages_evicted; + pool_info->pages_evicted_rate= + double(stat.n_ra_pages_evicted - old_stat.n_ra_pages_evicted) / elapsed; + pool_info->unzip_lru_len= UT_LIST_GET_LEN(unzip_LRU); + pool_info->io_sum= buf_LRU_stat_sum.io; + pool_info->io_cur= buf_LRU_stat_cur.io; + pool_info->unzip_sum= buf_LRU_stat_sum.unzip; + pool_info->unzip_cur= buf_LRU_stat_cur.unzip; + buf_refresh_io_stats(); + mysql_mutex_unlock(&mutex); } /*********************************************************************//** @@ -4322,7 +4172,7 @@ buf_print_io( { buf_pool_info_t pool_info; - buf_stats_get_pool_info(&pool_info); + buf_pool.get_info(&pool_info); buf_print_io_instance(&pool_info, file); } diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc index faf87165c2eb7..6a9cd99b0e120 100644 --- a/storage/innobase/buf/buf0dump.cc +++ b/storage/innobase/buf/buf0dump.cc @@ -58,7 +58,7 @@ take after being waked up. */ static volatile bool buf_dump_should_start; static volatile bool buf_load_should_start; -static bool buf_load_abort_flag; +static Atomic_relaxed buf_load_abort_flag; /** Start the buffer pool dump/load task and instructs it to start a dump. */ void buf_dump_start() @@ -295,7 +295,7 @@ buf_dump( /* limit the number of total pages dumped to X% of the total number of pages */ - t_pages = buf_pool.curr_size * srv_buf_pool_dump_pct / 100; + t_pages = buf_pool.curr_size() * srv_buf_pool_dump_pct / 100; if (n_pages > t_pages) { buf_dump_status(STATUS_INFO, "Restricted to " ULINTPF @@ -475,10 +475,10 @@ buf_load() return; } - /* If dump is larger than the buffer pool(s), then we ignore the + /* If the dump is larger than the buffer pool, then we ignore the extra trailing. This could happen if a dump is made, then buffer pool is shrunk and then load is attempted. */ - dump_n = std::min(dump_n, buf_pool.get_n_pages()); + dump_n = std::min(dump_n, buf_pool.curr_size()); if (dump_n != 0) { dump = static_cast(ut_malloc_nokey( diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index b0ee82e03e06b..2d6fc150999e7 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -187,7 +187,7 @@ void buf_pool_t::insert_into_flush_list(buf_block_t *block, lsn_t lsn) noexcept } else flush_list_bytes+= block->physical_size(); - ut_ad(flush_list_bytes <= curr_pool_size); + ut_ad(flush_list_bytes <= size_in_bytes); block->page.set_oldest_modification(lsn); MEM_CHECK_DEFINED(block->page.zip.data @@ -318,6 +318,8 @@ void buf_page_t::write_complete(bool persistent, bool error, uint32_t state) { ut_ad(!persistent == fsp_is_system_temporary(id().space())); ut_ad(state >= WRITE_FIX); + ut_ad(!frame || + frame == reinterpret_cast(this)->frame_address()); if (UNIV_LIKELY(!error)) { @@ -833,6 +835,7 @@ bool buf_page_t::flush(fil_space_t *space) noexcept size_t orig_size; #endif buf_tmp_buffer_t *slot= nullptr; + byte *page= frame; if (UNIV_UNLIKELY(!frame)) /* ROW_FORMAT=COMPRESSED */ { @@ -848,7 +851,6 @@ bool buf_page_t::flush(fil_space_t *space) noexcept } else { - byte *page= frame; size= block->physical_size(); #if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32 orig_size= size; @@ -929,7 +931,7 @@ static page_id_t buf_flush_check_neighbors(const fil_space_t &space, : space.physical_size() == 1024 ? 3 : 0)); /* When flushed, dirty blocks are searched in neighborhoods of this size, and flushed along with the original page. */ - const ulint s= buf_pool.curr_size / 16; + const ulint s= buf_pool.curr_size() / 16; const uint32_t read_ahead= buf_pool.read_ahead_area; const uint32_t buf_flush_area= read_ahead > s ? static_cast(s) : read_ahead; @@ -1250,18 +1252,34 @@ static void buf_flush_discard_page(buf_page_t *bpage) noexcept buf_LRU_free_page(bpage, true); } +/** Adjust to_withdraw during buf_pool_t::shrink() */ +ATTRIBUTE_COLD static size_t buf_flush_LRU_to_withdraw(size_t to_withdraw, + const buf_page_t &bpage) + noexcept +{ + mysql_mutex_assert_owner(&buf_pool.mutex); + if (!buf_pool.is_shrinking()) + return 0; + const size_t size{buf_pool.size_in_bytes_requested}; + if (buf_pool.will_be_withdrawn(bpage.frame, size) || + buf_pool.will_be_withdrawn(bpage.zip.data, size)) + to_withdraw--; + return to_withdraw; +} + /** Flush dirty blocks from the end buf_pool.LRU, and move clean blocks to buf_pool.free. -@param max maximum number of blocks to flush -@param n counts of flushed and evicted pages */ -static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept +@param max maximum number of blocks to flush +@param n counts of flushed and evicted pages +@param to_withdraw buf_pool.to_withdraw() */ +static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n, + size_t to_withdraw) noexcept { - ulint scanned= 0; + size_t scanned= 0; mysql_mutex_assert_owner(&buf_pool.mutex); - ulint free_limit{buf_pool.LRU_scan_depth}; - if (buf_pool.withdraw_target && buf_pool.is_shrinking()) - free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw); - + size_t free_limit{buf_pool.LRU_scan_depth}; + if (UNIV_UNLIKELY(to_withdraw > free_limit)) + to_withdraw= free_limit; const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN ? 0 : buf_pool.flush_neighbors; fil_space_t *space= nullptr; @@ -1271,20 +1289,21 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept /* BUF_LRU_MIN_LEN (256) is too high value for low buffer pool(BP) size. For example, for BP size lower than 80M and 16 K page size, the limit is more than - 5% of total BP and for lowest BP 5M, it is 80% of the BP. Non-data objects + 5% of total BP and for lowest BP 6M, it is 80% of the BP. Non-data objects like explicit locks could occupy part of the BP pool reducing the pages available for LRU. If LRU reaches minimum limit and if no free pages are available, server would hang with page cleaner not able to free any more pages. To avoid such hang, we adjust the LRU limit lower than the limit for data objects as checked in buf_LRU_check_size_of_non_data_objects() i.e. one page less than 5% of BP. */ - size_t pool_limit= buf_pool.curr_size / 20 - 1; - auto buf_lru_min_len= std::min(pool_limit, BUF_LRU_MIN_LEN); + const size_t buf_lru_min_len= + std::min((buf_pool.usable_size()) / 20 - 1, size_t{BUF_LRU_MIN_LEN}); for (buf_page_t *bpage= UT_LIST_GET_LAST(buf_pool.LRU); bpage && ((UT_LIST_GET_LEN(buf_pool.LRU) > buf_lru_min_len && UT_LIST_GET_LEN(buf_pool.free) < free_limit) || + to_withdraw || recv_recovery_is_on()); ++scanned, bpage= buf_pool.lru_hp.get()) { @@ -1300,6 +1319,8 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept if (state != buf_page_t::FREED && (state >= buf_page_t::READ_FIX || (~buf_page_t::LRU_MASK & state))) continue; + if (UNIV_UNLIKELY(to_withdraw != 0)) + to_withdraw= buf_flush_LRU_to_withdraw(to_withdraw, *bpage); buf_LRU_free_page(bpage, true); ++n->evicted; if (UNIV_LIKELY(scanned & 31)) @@ -1371,23 +1392,32 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept continue; } + if (state < buf_page_t::UNFIXED) + goto flush; + if (n->flushed >= max && !recv_recovery_is_on()) { bpage->lock.u_unlock(true); break; } - if (neighbors && space->is_rotational() && + if (neighbors && space->is_rotational() && UNIV_LIKELY(!to_withdraw) && /* Skip neighbourhood flush from LRU list if we haven't yet reached half of the free page target. */ UT_LIST_GET_LEN(buf_pool.free) * 2 >= free_limit) n->flushed+= buf_flush_try_neighbors(space, page_id, bpage, neighbors == 1, n->flushed, max); - else if (bpage->flush(space)) - ++n->flushed; else - continue; + { + flush: + if (UNIV_UNLIKELY(to_withdraw != 0)) + to_withdraw= buf_flush_LRU_to_withdraw(to_withdraw, *bpage); + if (bpage->flush(space)) + ++n->flushed; + else + continue; + } goto reacquire_mutex; } @@ -1416,11 +1446,12 @@ Whether LRU or unzip_LRU is used depends on the state of the system. @param n counts of flushed and evicted pages */ static void buf_do_LRU_batch(ulint max, flush_counters_t *n) noexcept { - if (buf_LRU_evict_from_unzip_LRU()) + const size_t to_withdraw= buf_pool.to_withdraw(); + if (!to_withdraw && buf_LRU_evict_from_unzip_LRU()) buf_free_from_unzip_LRU_list_batch(); n->evicted= 0; n->flushed= 0; - buf_flush_LRU_list_batch(max, n); + buf_flush_LRU_list_batch(max, n, to_withdraw); mysql_mutex_assert_owner(&buf_pool.mutex); buf_lru_freed_page_count+= n->evicted; @@ -1784,7 +1815,7 @@ static ulint buf_flush_LRU(ulint max_n) noexcept pthread_cond_broadcast(&buf_pool.done_free); } else if (!pages && !buf_pool.try_LRU_scan) - /* For example, with the minimum innodb_buffer_pool_size=5M and + /* For example, with the minimum innodb_buffer_pool_size=6M and the default innodb_page_size=16k there are only a little over 316 pages in the buffer pool. The buffer pool can easily be exhausted by a workload of some dozen concurrent connections. The system could @@ -2097,6 +2128,8 @@ static void buf_flush_sync_for_checkpoint(lsn_t lsn) noexcept MONITOR_FLUSH_SYNC_PAGES, n_flushed); } + os_aio_wait_until_no_pending_writes(false); + switch (srv_file_flush_method) { case SRV_NOSYNC: case SRV_O_DIRECT_NO_FSYNC: @@ -2320,6 +2353,13 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in, goto func_exit; } +TPOOL_SUPPRESS_TSAN +bool buf_pool_t::running_out() const noexcept +{ + return !recv_recovery_is_on() && + UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < n_blocks / 4; +} + TPOOL_SUPPRESS_TSAN bool buf_pool_t::need_LRU_eviction() const noexcept { @@ -2380,7 +2420,7 @@ static void buf_flush_page_cleaner() noexcept (!UT_LIST_GET_LEN(buf_pool.flush_list) || srv_max_dirty_pages_pct_lwm == 0.0)) { - buf_pool.LRU_warned.clear(std::memory_order_release); + buf_pool.LRU_warned_clear(); /* We are idle; wait for buf_pool.page_cleaner_wakeup() */ my_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex); @@ -2455,6 +2495,7 @@ static void buf_flush_page_cleaner() noexcept buf_pool.n_flush_inc(); mysql_mutex_unlock(&buf_pool.flush_list_mutex); n= srv_max_io_capacity; + os_aio_wait_until_no_pending_writes(false); mysql_mutex_lock(&buf_pool.mutex); LRU_flush: n= buf_flush_LRU(n); @@ -2558,7 +2599,10 @@ static void buf_flush_page_cleaner() noexcept !buf_pool.need_LRU_eviction()) goto check_oldest_and_set_idle; else + { mysql_mutex_lock(&buf_pool.mutex); + os_aio_wait_until_no_pending_writes(false); + } n= srv_max_io_capacity; n= n >= n_flushed ? n - n_flushed : 0; @@ -2603,11 +2647,14 @@ ATTRIBUTE_COLD void buf_pool_t::LRU_warn() noexcept { mysql_mutex_assert_owner(&mutex); try_LRU_scan= false; - if (!LRU_warned.test_and_set(std::memory_order_acquire)) + if (!LRU_warned) { + LRU_warned= true; sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" - " Consider increasing innodb_buffer_pool_size."); - buf_pool.print_flush_info(); + " %zu blocks are in use and %zu free." + " Consider increasing innodb_buffer_pool_size.", + UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free)); + print_flush_info(); } } diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 2819943ebb913..0f7df439f4264 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -38,6 +38,7 @@ Created 11/5/1995 Heikki Tuuri #include "srv0srv.h" #include "srv0mon.h" #include "my_cpu.h" +#include "log.h" /** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV @@ -133,7 +134,7 @@ static inline void incr_LRU_size_in_bytes(const buf_page_t* bpage) buf_pool.stat.LRU_bytes += bpage->physical_size(); - ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size); + ut_ad(buf_pool.stat.LRU_bytes <= buf_pool.curr_pool_size()); } /** @return whether the unzip_LRU list should be used for evicting a victim @@ -259,89 +260,55 @@ static bool buf_LRU_free_from_common_LRU_list(ulint limit) return(freed); } -/** @return a buffer block from the buf_pool.free list -@retval NULL if the free list is empty */ -buf_block_t* buf_LRU_get_free_only() -{ - buf_block_t* block; - - mysql_mutex_assert_owner(&buf_pool.mutex); - - block = reinterpret_cast( - UT_LIST_GET_FIRST(buf_pool.free)); - - while (block != NULL) { - ut_ad(block->page.in_free_list); - ut_d(block->page.in_free_list = FALSE); - ut_ad(!block->page.oldest_modification()); - ut_ad(!block->page.in_LRU_list); - ut_a(!block->page.in_file()); - UT_LIST_REMOVE(buf_pool.free, &block->page); - - if (!buf_pool.is_shrinking() - || UT_LIST_GET_LEN(buf_pool.withdraw) - >= buf_pool.withdraw_target - || !buf_pool.will_be_withdrawn(block->page)) { - /* No adaptive hash index entries may point to - a free block. */ - assert_block_ahi_empty(block); - - block->page.set_state(buf_page_t::MEMORY); - MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); - break; - } - - /* This should be withdrawn */ - UT_LIST_ADD_LAST(buf_pool.withdraw, &block->page); - ut_d(block->in_withdraw_list = true); - - block = reinterpret_cast( - UT_LIST_GET_FIRST(buf_pool.free)); - } - - return(block); -} - /******************************************************************//** Checks how much of buf_pool is occupied by non-data objects like AHI, lock heaps etc. Depending on the size of non-data objects this function will either assert or issue a warning and switch on the status monitor. */ -static void buf_LRU_check_size_of_non_data_objects() +static void buf_LRU_check_size_of_non_data_objects() noexcept { mysql_mutex_assert_owner(&buf_pool.mutex); - if (recv_recovery_is_on() || buf_pool.n_chunks_new != buf_pool.n_chunks) + if (recv_recovery_is_on()) return; - const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + const size_t curr_size{buf_pool.usable_size()}; - if (s < buf_pool.curr_size / 20) - ib::fatal() << "Over 95 percent of the buffer pool is" - " occupied by lock heaps" + auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); + + if (s >= curr_size / 20); + else if (buf_pool.is_shrinking()) + buf_pool.LRU_warn(); + else + { + sql_print_error("[FATAL] InnoDB: Over 95 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many" - " row locks, or review if innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M could be bigger."; + "! Check that your transactions do not set too many" + " row locks, or review if innodb_buffer_pool_size=%zuM" + " could be bigger", + curr_size >> (20 - srv_page_size_shift)); + abort(); + } - if (s < buf_pool.curr_size / 3) + if (s < curr_size / 3) { if (!buf_lru_switched_on_innodb_mon && srv_monitor_timer) { /* Over 67 % of the buffer pool is occupied by lock heaps or the adaptive hash index. This may be a memory leak! */ - ib::warn() << "Over 67 percent of the buffer pool is" - " occupied by lock heaps" + sql_print_warning("InnoDB: Over 67 percent of the buffer pool is" + " occupied by lock heaps" #ifdef BTR_CUR_HASH_ADAPT - " or the adaptive hash index" + " or the adaptive hash index" #endif /* BTR_CUR_HASH_ADAPT */ - "! Check that your transactions do not set too many row locks." - " innodb_buffer_pool_size=" - << (buf_pool.curr_size >> (20U - srv_page_size_shift)) - << "M. Starting the InnoDB Monitor to print diagnostics."; + "! Check that your transactions do not set too many" + " row locks. innodb_buffer_pool_size=%zuM." + " Starting the InnoDB Monitor to print diagnostics.", + curr_size >> (20 - srv_page_size_shift)); + buf_lru_switched_on_innodb_mon= true; srv_print_innodb_monitor= TRUE; srv_monitor_timer_schedule_now(); @@ -389,15 +356,15 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) retry: /* If there is a block in the free list, take it */ - block= buf_LRU_get_free_only(); + block= buf_pool.allocate(); if (block) { got_block: const ulint LRU_size= UT_LIST_GET_LEN(buf_pool.LRU); const ulint available= UT_LIST_GET_LEN(buf_pool.free); - const ulint scan_depth= buf_pool.LRU_scan_depth / 2; - ut_ad(LRU_size <= BUF_LRU_MIN_LEN || - available >= scan_depth || buf_pool.need_LRU_eviction()); + const size_t scan_depth{buf_pool.LRU_scan_depth / 2}; + ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth || + buf_pool.is_shrinking() || buf_pool.need_LRU_eviction()); ut_d(bool signalled = false); @@ -446,7 +413,7 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) waited= true; - while (!(block= buf_LRU_get_free_only())) + while (!(block= buf_pool.allocate())) { buf_pool.stat.LRU_waits++; @@ -811,10 +778,10 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) if (zip || !bpage->zip.data || !bpage->frame) { break; } + mysql_mutex_lock(&buf_pool.flush_list_mutex); relocate_compressed: b = static_cast(ut_zalloc_nokey(sizeof *b)); ut_a(b); - mysql_mutex_lock(&buf_pool.flush_list_mutex); new (b) buf_page_t(*bpage); b->frame = nullptr; { @@ -833,7 +800,12 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) hash_lock.unlock(); return(false); } - goto relocate_compressed; + mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (bpage->can_relocate()) { + goto relocate_compressed; + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + goto func_exit; } mysql_mutex_assert_owner(&buf_pool.mutex); @@ -872,7 +844,6 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) /* The fields of bpage were copied to b before buf_LRU_block_remove_hashed() was invoked. */ - ut_ad(!b->in_zip_hash); ut_ad(b->in_LRU_list); ut_ad(b->in_page_hash); ut_d(b->in_page_hash = false); @@ -995,24 +966,12 @@ buf_LRU_block_free_non_file_page( if (data != NULL) { block->page.zip.data = NULL; - buf_pool_mutex_exit_forbid(); - ut_ad(block->zip_size()); - buf_buddy_free(data, block->zip_size()); - - buf_pool_mutex_exit_allow(); page_zip_set_size(&block->page.zip, 0); } - if (buf_pool.is_shrinking() - && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target - && buf_pool.will_be_withdrawn(block->page)) { - /* This should be withdrawn */ - UT_LIST_ADD_LAST( - buf_pool.withdraw, - &block->page); - ut_d(block->in_withdraw_list = true); + if (buf_pool.to_withdraw() && buf_pool.withdraw(block->page)) { } else { UT_LIST_ADD_FIRST(buf_pool.free, &block->page); ut_d(block->page.in_free_list = true); @@ -1113,7 +1072,6 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, MEM_CHECK_ADDRESSABLE(bpage->zip.data, bpage->zip_size()); } - ut_ad(!bpage->in_zip_hash); buf_pool.page_hash.remove(chain, bpage); page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain); @@ -1125,11 +1083,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, ut_ad(!bpage->oldest_modification()); hash_lock.unlock(); - buf_pool_mutex_exit_forbid(); - buf_buddy_free(bpage->zip.data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(); bpage->lock.free(); ut_free(bpage); return false; @@ -1158,12 +1112,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id, ut_ad(!bpage->in_free_list); ut_ad(!bpage->oldest_modification()); ut_ad(!bpage->in_LRU_list); - buf_pool_mutex_exit_forbid(); - buf_buddy_free(data, bpage->zip_size()); - - buf_pool_mutex_exit_allow(); - page_zip_set_size(&bpage->zip, 0); } @@ -1334,7 +1283,7 @@ void buf_LRU_validate() ut_ad(!bpage->frame || reinterpret_cast(bpage) ->in_unzip_LRU_list - == bpage->belongs_to_unzip_LRU()); + == !!bpage->zip.data); if (bpage->is_old()) { const buf_page_t* prev diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index c0e69b42c4001..54ff4abcb2b0a 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -44,7 +44,7 @@ Created 11/5/1995 Heikki Tuuri #include "log.h" #include "mariadb_stats.h" -/** If there are buf_pool.curr_size per the number below pending reads, then +/** If there are buf_pool.curr_size() per the number below pending reads, then read-ahead is not done: this is to prevent flooding the buffer pool with i/o-fixed buffer blocks */ #define BUF_READ_AHEAD_PEND_LIMIT 2 @@ -63,7 +63,6 @@ inline uint32_t buf_pool_t::watch_remove(buf_page_t *w, ut_ad(xtest() || page_hash.lock_get(chain).is_write_locked()); ut_ad(w >= &watch[0]); ut_ad(w < &watch[array_elements(watch)]); - ut_ad(!w->in_zip_hash); ut_ad(!w->zip.data); uint32_t s{w->state()}; @@ -371,7 +370,7 @@ ulint buf_read_ahead_random(const page_id_t page_id, bool ibuf) noexcept return 0; if (os_aio_pending_reads_approx() > - buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) + buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT) return 0; fil_space_t* space= fil_space_t::get(page_id.space()); @@ -524,7 +523,7 @@ ulint buf_read_ahead_linear(const page_id_t page_id, bool ibuf) noexcept return 0; if (os_aio_pending_reads_approx() > - buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT) + buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT) return 0; const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index bced539a0eb7c..ab40a6265fea4 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -992,7 +992,7 @@ void dict_sys_t::create() noexcept UT_LIST_INIT(table_LRU, &dict_table_t::table_LRU); UT_LIST_INIT(table_non_LRU, &dict_table_t::table_LRU); - const ulint hash_size = buf_pool_get_curr_size() + const ulint hash_size = buf_pool.curr_pool_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); @@ -4440,7 +4440,7 @@ void dict_sys_t::resize() noexcept table_id_hash.free(); temp_id_hash.free(); - const ulint hash_size = buf_pool_get_curr_size() + const ulint hash_size = buf_pool.curr_pool_size() / (DICT_POOL_PER_TABLE_HASH * UNIV_WORD_SIZE); table_hash.create(hash_size); table_id_hash.create(hash_size); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 50cfc5570d227..903259bd86e44 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -45,6 +45,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include #include #include +#include #include #include #include @@ -160,8 +161,6 @@ static const long AUTOINC_NO_LOCKING = 2; static ulong innobase_open_files; static long innobase_autoinc_lock_mode; -ulonglong innobase_buffer_pool_size; - /** Percentage of the buffer pool to reserve for 'old' blocks. Connected to buf_LRU_old_ratio. */ static uint innobase_old_blocks_pct; @@ -3743,58 +3742,44 @@ static int innodb_init_abort() DBUG_RETURN(1); } -/** Return the minimum buffer pool size based on page size */ -static inline ulint min_buffer_pool_size() +static void innodb_buffer_pool_size_update(THD* thd,st_mysql_sys_var*,void*, + const void *save) noexcept { - ulint s= (BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4) * srv_page_size; - /* buf_pool_chunk_size minimum is 1M, so round up to a multiple */ - ulint alignment= 1U << 20; - return UT_CALC_ALIGN(s, alignment); + buf_pool.resize(*static_cast(save), thd); } -/** Validate the requested buffer pool size. Also, reserve the necessary -memory needed for buffer pool resize. -@param[in] thd thread handle -@param[in] var pointer to system variable -@param[out] save immediate result for update function -@param[in] value incoming string -@return 0 on success, 1 on failure. -*/ -static -int -innodb_buffer_pool_size_validate( - THD* thd, - struct st_mysql_sys_var* var, - void* save, - struct st_mysql_value* value); - -/** Update the system variable innodb_buffer_pool_size using the "saved" -value. This function is registered as a callback with MySQL. -@param[in] thd thread handle -@param[in] var pointer to system variable -@param[out] var_ptr where the formal string goes -@param[in] save immediate result from check function */ -static -void -innodb_buffer_pool_size_update( - THD* thd, - struct st_mysql_sys_var* var, - void* var_ptr, - const void* save); - -/* If the default value of innodb_buffer_pool_size is increased to be more than -BUF_POOL_SIZE_THRESHOLD (srv/srv0start.cc), then srv_buf_pool_instances_default -can be removed and 8 used instead. The problem with the current setup is that -with 128MiB default buffer pool size and 8 instances by default we would emit -a warning when no options are specified. */ -static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size, +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size, buf_pool.size_in_bytes_requested, PLUGIN_VAR_RQCMDARG, - "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - innodb_buffer_pool_size_validate, - innodb_buffer_pool_size_update, - 128ULL << 20, - 2ULL << 20, - LLONG_MAX, 1024*1024L); + "The size of the memory buffer InnoDB uses to cache data" + " and indexes of its tables.", + nullptr, innodb_buffer_pool_size_update, 128U << 20, 2U << 20, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), 1U << 20); + +#if SIZEOF_SIZE_T < 8 || defined _AIX || defined HAVE_valgrind +/* In constrained environments, innodb_buffer_pool_size_max +will default to the initial innodb_buffer_pool_size, that is, +by default, it will not be possible to increase innodb_buffer_pool_size. + +In MemorySanitizer and possibly Valgrind memcheck, any virtual memory +allocation would be backed by one or more copies of shadow bits of the +same size that could be allocated and initialized even for dummy +mappings created by mmap(2) with PROT_NONE. We do not want significant +overhead beyond the actual innodb_buffer_pool_size. */ +static constexpr size_t innodb_buffer_pool_size_max_default{0}, + innodb_buffer_pool_size_max_minimum{0}; +#else +static constexpr size_t innodb_buffer_pool_size_max_default{8ULL << 40},// 8TiB + innodb_buffer_pool_size_max_minimum{innodb_buffer_pool_extent_size}; +#endif + +static MYSQL_SYSVAR_SIZE_T(buffer_pool_size_max, buf_pool.size_in_bytes_max, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Maximum innodb_buffer_pool_size", + nullptr, nullptr, + innodb_buffer_pool_size_max_default, + innodb_buffer_pool_size_max_minimum, + size_t(-ssize_t(innodb_buffer_pool_extent_size)), + innodb_buffer_pool_extent_size); /****************************************************************//** Gives the file extension of an InnoDB single-table tablespace. */ @@ -3830,17 +3815,33 @@ static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id) return 0; } -/** Initialize and normalize innodb_buffer_pool_size. */ -static void innodb_buffer_pool_size_init() +static bool +compression_algorithm_is_not_loaded(ulong compression_algorithm) noexcept { - if (srv_buf_pool_chunk_unit > srv_buf_pool_size) { - /* Size unit of buffer pool is larger than srv_buf_pool_size. - adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */ - srv_buf_pool_chunk_unit = ulong(srv_buf_pool_size); - } + do + { +#ifndef HAVE_LZ4 + if (compression_algorithm == PAGE_LZ4_ALGORITHM) continue; +#endif +#ifndef HAVE_LZO + if (compression_algorithm == PAGE_LZO_ALGORITHM) continue; +#endif +#ifndef HAVE_LZMA + if (compression_algorithm == PAGE_LZMA_ALGORITHM) continue; +#endif +#ifndef HAVE_BZIP2 + if (compression_algorithm == PAGE_BZIP2_ALGORITHM) continue; +#endif +#ifndef HAVE_SNAPPY + if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) continue; +#endif + return false; + } + while (false); - srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size); - innobase_buffer_pool_size = srv_buf_pool_size; + sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n", + compression_algorithm); + return true; } /** Initialize, validate and normalize the InnoDB startup parameters. @@ -3850,359 +3851,288 @@ static void innodb_buffer_pool_size_init() @retval HA_ERR_INITIALIZATION when some parameters are out of range */ static int innodb_init_params() { - DBUG_ENTER("innodb_init_params"); - - ulong num_pll_degree; - - /* Check that values don't overflow on 32-bit systems. */ - if (sizeof(ulint) == 4) { - if (innobase_buffer_pool_size > UINT_MAX32) { - sql_print_error( - "innodb_buffer_pool_size can't be over 4GB" - " on 32-bit systems"); - DBUG_RETURN(HA_ERR_OUT_OF_MEM); - } - } + DBUG_ENTER("innodb_init_params"); - /* The buffer pool needs to be able to accommodate enough many - pages, even for larger pages */ - MYSQL_SYSVAR_NAME(buffer_pool_size).min_val= min_buffer_pool_size(); - - if (innobase_buffer_pool_size < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) { - ib::error() << "innodb_page_size=" - << srv_page_size << " requires " - << "innodb_buffer_pool_size >= " - << (MYSQL_SYSVAR_NAME(buffer_pool_size).min_val >> 20) - << "MiB current " << (innobase_buffer_pool_size >> 20) - << "MiB"; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + srv_page_size_shift= innodb_page_size_validate(srv_page_size); + if (!srv_page_size_shift) + { + sql_print_error("InnoDB: Invalid page size=%lu.\n", srv_page_size); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } -#ifndef HAVE_LZ4 - if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblz4 is not installed. \n", - innodb_compression_algorithm); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } -#endif + size_t &min= MYSQL_SYSVAR_NAME(buffer_pool_size).min_val; + min= ut_calc_align + (buf_pool.blocks_in_bytes(BUF_LRU_MIN_LEN + BUF_LRU_MIN_LEN / 4), + 1U << 20); + const size_t innodb_buffer_pool_size= buf_pool.size_in_bytes_requested; -#ifndef HAVE_LZO - if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblzo is not installed. \n", - innodb_compression_algorithm); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } -#endif + if (innodb_buffer_pool_size > buf_pool.size_in_bytes_max || + my_use_large_pages /* large_pages=ON fixes innodb_buffer_pool_size */) + buf_pool.size_in_bytes_max= ut_calc_align(innodb_buffer_pool_size, + innodb_buffer_pool_extent_size); -#ifndef HAVE_LZMA - if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: liblzma is not installed. \n", - innodb_compression_algorithm); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } -#endif + MYSQL_SYSVAR_NAME(buffer_pool_size).max_val= buf_pool.size_in_bytes_max; -#ifndef HAVE_BZIP2 - if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: libbz2 is not installed. \n", - innodb_compression_algorithm); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } -#endif + if (innodb_buffer_pool_size < min) + { + sql_print_error("InnoDB: innodb_page_size=%lu requires " + "innodb_buffer_pool_size >= %zu MiB current %zu MiB", + srv_page_size, min >> 20, innodb_buffer_pool_size >> 20); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } -#ifndef HAVE_SNAPPY - if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) { - sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n" - "InnoDB: libsnappy is not installed. \n", - innodb_compression_algorithm); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } -#endif + if (compression_algorithm_is_not_loaded(innodb_compression_algorithm)) + DBUG_RETURN(HA_ERR_INITIALIZATION); - if ((srv_encrypt_tables || srv_encrypt_log - || innodb_encrypt_temporary_tables) - && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) { - sql_print_error("InnoDB: cannot enable encryption, " - "encryption plugin is not available"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if ((srv_encrypt_tables || srv_encrypt_log || + innodb_encrypt_temporary_tables) && + !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) + { + sql_print_error("InnoDB: cannot enable encryption, " + "encryption plugin is not available"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } #ifdef _WIN32 - if (!is_filename_allowed(srv_buf_dump_filename, - strlen(srv_buf_dump_filename), FALSE)) { - sql_print_error("InnoDB: innodb_buffer_pool_filename" - " cannot have colon (:) in the file name."); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (!is_filename_allowed(srv_buf_dump_filename, + strlen(srv_buf_dump_filename), false)) + { + sql_print_error("InnoDB: innodb_buffer_pool_filename" + " cannot have colon (:) in the file name."); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } #endif - /* First calculate the default path for innodb_data_home_dir etc., - in case the user has not given any value. + /* First calculate the default path for innodb_data_home_dir etc., + in case the user has not given any value. - Note that when using the embedded server, the datadirectory is not - necessarily the current directory of this program. */ + Note that when using the embedded server, the datadirectory is not + necessarily the current directory of this program. */ - fil_path_to_mysql_datadir = + fil_path_to_mysql_datadir = #ifndef HAVE_REPLICATION - mysqld_embedded ? mysql_real_data_home : + mysqld_embedded ? mysql_real_data_home : #endif - "./"; + "./"; - /* Set InnoDB initialization parameters according to the values - read from MySQL .cnf file */ + /* Set InnoDB initialization parameters according to the values + read from MySQL .cnf file */ - /* The default dir for data files is the datadir of MySQL */ + /* The default dir for data files is the datadir of MySQL */ - srv_data_home = innobase_data_home_dir - ? innobase_data_home_dir - : const_cast(fil_path_to_mysql_datadir); + srv_data_home= innobase_data_home_dir + ? innobase_data_home_dir + : const_cast(fil_path_to_mysql_datadir); #ifdef WITH_WSREP - /* If we use the wsrep API, then we need to tell the server - the path to the data files (for passing it to the SST scripts): */ - wsrep_set_data_home_dir(srv_data_home); + /* If we use the wsrep API, then we need to tell the server + the path to the data files (for passing it to the SST scripts): */ + wsrep_set_data_home_dir(srv_data_home); #endif /* WITH_WSREP */ - /*--------------- Shared tablespaces -------------------------*/ - - /* Check that the value of system variable innodb_page_size was - set correctly. Its value was put into srv_page_size. If valid, - return the associated srv_page_size_shift. */ - srv_page_size_shift = innodb_page_size_validate(srv_page_size); - if (!srv_page_size_shift) { - sql_print_error("InnoDB: Invalid page size=%lu.\n", - srv_page_size); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - srv_sys_space.set_space_id(TRX_SYS_SPACE); - - switch (srv_checksum_algorithm) { - case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: - srv_sys_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); - break; - default: - srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); - } - - srv_sys_space.set_path(srv_data_home); + /*--------------- Shared tablespaces -------------------------*/ - /* Supports raw devices */ - if (!srv_sys_space.parse_params(innobase_data_file_path, true)) { - ib::error() << "Unable to parse innodb_data_file_path=" - << innobase_data_file_path; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - srv_tmp_space.set_path(srv_data_home); - - /* Temporary tablespace is in full crc32 format. */ - srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER - | FSP_FLAGS_FCRC32_PAGE_SSIZE()); - - if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) { - ib::error() << "Unable to parse innodb_temp_data_file_path=" - << innobase_temp_data_file_path; - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - /* Perform all sanity check before we take action of deleting files*/ - if (srv_sys_space.intersection(&srv_tmp_space)) { - sql_print_error("innodb_temporary and innodb_system" - " file names seem to be the same."); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + /* Check that the value of system variable innodb_page_size was + set correctly. Its value was put into srv_page_size. If valid, + return the associated srv_page_size_shift. */ - srv_sys_space.normalize_size(); - srv_tmp_space.normalize_size(); + srv_sys_space.set_space_id(TRX_SYS_SPACE); + /* Temporary tablespace is in full crc32 format. */ + srv_tmp_space.set_flags(FSP_FLAGS_FCRC32_MASK_MARKER | + FSP_FLAGS_FCRC32_PAGE_SSIZE()); - /* ------------ UNDO tablespaces files ---------------------*/ - if (!srv_undo_dir) { - srv_undo_dir = const_cast(fil_path_to_mysql_datadir); - } - - if (strchr(srv_undo_dir, ';')) { - sql_print_error("syntax error in innodb_undo_directory"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } - - /* -------------- All log files ---------------------------*/ - - /* The default dir for log files is the datadir of MySQL */ + switch (srv_checksum_algorithm) { + case SRV_CHECKSUM_ALGORITHM_FULL_CRC32: + case SRV_CHECKSUM_ALGORITHM_STRICT_FULL_CRC32: + srv_sys_space.set_flags(srv_tmp_space.flags()); + break; + default: + srv_sys_space.set_flags(FSP_FLAGS_PAGE_SSIZE()); + } - if (!srv_log_group_home_dir) { - srv_log_group_home_dir - = const_cast(fil_path_to_mysql_datadir); - } + srv_sys_space.set_path(srv_data_home); - if (strchr(srv_log_group_home_dir, ';')) { - sql_print_error("syntax error in innodb_log_group_home_dir"); - DBUG_RETURN(HA_ERR_INITIALIZATION); - } + if (!srv_sys_space.parse_params(innobase_data_file_path, true)) + { + sql_print_error("InnoDB: Unable to parse innodb_data_file_path=%s", + innobase_data_file_path); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL); + srv_tmp_space.set_path(srv_data_home); - /* Check that interdependent parameters have sane values. */ - if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) { - sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" - " cannot be set higher than" - " innodb_max_dirty_pages_pct.\n" - "InnoDB: Setting" - " innodb_max_dirty_pages_pct_lwm to %lf\n", - srv_max_buf_pool_modified_pct); + if (!srv_tmp_space.parse_params(innobase_temp_data_file_path, false)) + { + sql_print_error("InnoDB: Unable to parse innodb_temp_data_file_path=%s", + innobase_temp_data_file_path); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; - } + /* Perform all sanity check before we take action of deleting files*/ + if (srv_sys_space.intersection(&srv_tmp_space)) + { + sql_print_error("innodb_temporary and innodb_system" + " file names seem to be the same."); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) { + srv_sys_space.normalize_size(); + srv_tmp_space.normalize_size(); - if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) { - /* Avoid overflow. */ - srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT; - } else { - /* The user has not set the value. We should - set it based on innodb_io_capacity. */ - srv_max_io_capacity = - ut_max(2 * srv_io_capacity, 2000UL); - } + /* ------------ UNDO tablespaces files ---------------------*/ + if (!srv_undo_dir) + srv_undo_dir= const_cast(fil_path_to_mysql_datadir); - } else if (srv_max_io_capacity < srv_io_capacity) { - sql_print_warning("InnoDB: innodb_io_capacity" - " cannot be set higher than" - " innodb_io_capacity_max." - "Setting innodb_io_capacity=%lu", - srv_max_io_capacity); + if (strchr(srv_undo_dir, ';')) + { + sql_print_error("syntax error in innodb_undo_directory"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - srv_io_capacity = srv_max_io_capacity; - } + if (!srv_log_group_home_dir) + srv_log_group_home_dir= const_cast(fil_path_to_mysql_datadir); - if (UNIV_PAGE_SIZE_DEF != srv_page_size) { - ib::info() << "innodb_page_size=" << srv_page_size; + if (strchr(srv_log_group_home_dir, ';')) + { + sql_print_error("syntax error in innodb_log_group_home_dir"); + DBUG_RETURN(HA_ERR_INITIALIZATION); + } - srv_max_undo_log_size = std::max( - srv_max_undo_log_size, - ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) - << srv_page_size_shift); - } + DBUG_ASSERT(innodb_change_buffering <= IBUF_USE_ALL); - if (srv_log_write_ahead_size > srv_page_size) { - srv_log_write_ahead_size = srv_page_size; - } else { - ulong srv_log_write_ahead_size_tmp = OS_FILE_LOG_BLOCK_SIZE; + /* Check that interdependent parameters have sane values. */ + if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) + { + sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm" + " cannot be set higher than" + " innodb_max_dirty_pages_pct.\n" + "InnoDB: Setting" + " innodb_max_dirty_pages_pct_lwm to %lf\n", + srv_max_buf_pool_modified_pct); + srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct; + } - while (srv_log_write_ahead_size_tmp - < srv_log_write_ahead_size) { - srv_log_write_ahead_size_tmp - = srv_log_write_ahead_size_tmp * 2; - } - if (srv_log_write_ahead_size_tmp - != srv_log_write_ahead_size) { - srv_log_write_ahead_size - = srv_log_write_ahead_size_tmp / 2; - } - } + if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) + { + if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) + /* Avoid overflow. */ + srv_max_io_capacity= SRV_MAX_IO_CAPACITY_LIMIT; + else + /* The user has not set the value. We should set it based on + innodb_io_capacity. */ + srv_max_io_capacity= std::max(2 * srv_io_capacity, 2000UL); + } + else if (srv_max_io_capacity < srv_io_capacity) + { + sql_print_warning("InnoDB: innodb_io_capacity cannot be set higher than" + " innodb_io_capacity_max." + "Setting innodb_io_capacity=%lu", srv_max_io_capacity); + srv_io_capacity= srv_max_io_capacity; + } - srv_buf_pool_size = ulint(innobase_buffer_pool_size); + if (UNIV_PAGE_SIZE_DEF != srv_page_size) + { + sql_print_information("InnoDB: innodb_page_size=%lu", srv_page_size); + srv_max_undo_log_size= + std::max(srv_max_undo_log_size, + ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) << + srv_page_size_shift); + } - if (innobase_open_files < 10) { - innobase_open_files = 300; - if (srv_file_per_table && tc_size > 300 && tc_size < open_files_limit) { - innobase_open_files = tc_size; - } - } + if (innobase_open_files < 10) + innobase_open_files= (srv_file_per_table && tc_size > 300 && + tc_size < open_files_limit) + ? tc_size + : 300; - if (innobase_open_files > open_files_limit) { - ib::warn() << "innodb_open_files " << innobase_open_files - << " should not be greater" - << " than the open_files_limit " << open_files_limit; - if (innobase_open_files > tc_size) { - innobase_open_files = tc_size; - } - } + if (innobase_open_files > open_files_limit) + { + sql_print_warning("InnoDB: innodb_open_files %lu" + " should not be greater than the open_files_limit %lu", + innobase_open_files, open_files_limit); + if (innobase_open_files > tc_size) + innobase_open_files= tc_size; + } - ulint min_open_files_limit = srv_undo_tablespaces - + srv_sys_space.m_files.size() - + srv_tmp_space.m_files.size() + 1; - if (min_open_files_limit > innobase_open_files) { - sql_print_warning( - "InnoDB: innodb_open_files=%lu is not greater " - "than the number of system tablespace files, " - "temporary tablespace files, " - "innodb_undo_tablespaces=%lu; adjusting " - "to innodb_open_files=%zu", - innobase_open_files, srv_undo_tablespaces, - min_open_files_limit); - innobase_open_files = (ulong) min_open_files_limit; - } + const size_t min_open_files_limit= srv_undo_tablespaces + + srv_sys_space.m_files.size() + srv_tmp_space.m_files.size() + 1; + if (min_open_files_limit > innobase_open_files) + { + sql_print_warning("InnoDB: innodb_open_files=%lu is not greater " + "than the number of system tablespace files, " + "temporary tablespace files, " + "innodb_undo_tablespaces=%lu; adjusting " + "to innodb_open_files=%zu", + innobase_open_files, srv_undo_tablespaces, + min_open_files_limit); + innobase_open_files= ulong(min_open_files_limit); + } - srv_max_n_open_files = innobase_open_files; - srv_innodb_status = (ibool) innobase_create_status_file; + srv_max_n_open_files= innobase_open_files; + srv_innodb_status = (ibool) innobase_create_status_file; - srv_print_verbose_log = mysqld_embedded ? 0 : 1; + srv_print_verbose_log= !mysqld_embedded; - /* Round up fts_sort_pll_degree to nearest power of 2 number */ - for (num_pll_degree = 1; - num_pll_degree < fts_sort_pll_degree; - num_pll_degree <<= 1) { + if (!ut_is_2pow(fts_sort_pll_degree)) + { + ulong n; + for (n= 1; n < fts_sort_pll_degree; n<<= 1) {} + fts_sort_pll_degree= n; + } - /* No op */ - } + if (srv_log_write_ahead_size > srv_page_size) + srv_log_write_ahead_size= srv_page_size; + else + { + ulong srv_log_write_ahead_size_tmp= OS_FILE_LOG_BLOCK_SIZE; - fts_sort_pll_degree = num_pll_degree; + while (srv_log_write_ahead_size_tmp < srv_log_write_ahead_size) + srv_log_write_ahead_size_tmp*= 2; - /* Store the default charset-collation number of this MySQL - installation */ + if (srv_log_write_ahead_size_tmp != srv_log_write_ahead_size) + srv_log_write_ahead_size= srv_log_write_ahead_size_tmp / 2; + } - data_mysql_default_charset_coll = (ulint) default_charset_info->number; + /* Store the default charset-collation number of this installation */ + data_mysql_default_charset_coll = (ulint) default_charset_info->number; #ifdef HAVE_FCNTL_DIRECT - if (srv_use_atomic_writes && my_may_have_atomic_write) { - /* - Force O_DIRECT on Unixes (on Windows writes are always - unbuffered) - */ - switch (srv_file_flush_method) { - case SRV_O_DIRECT: - case SRV_O_DIRECT_NO_FSYNC: - break; - default: - srv_file_flush_method = SRV_O_DIRECT; - fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n"); - } - } + if (srv_use_atomic_writes && my_may_have_atomic_write) + { + /* Force O_DIRECT on Unixes (on Windows writes are always unbuffered) */ + switch (srv_file_flush_method) { + case SRV_O_DIRECT: + case SRV_O_DIRECT_NO_FSYNC: + break; + default: + srv_file_flush_method= SRV_O_DIRECT; + fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n"); + } + } #endif #if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32 - /* Currently native AIO is supported only on windows and linux - and that also when the support is compiled in. In all other - cases, we ignore the setting of innodb_use_native_aio. */ - srv_use_native_aio = FALSE; + /* Currently native AIO is supported only on windows and linux + and that also when the support is compiled in. In all other + cases, we ignore the setting of innodb_use_native_aio. */ + srv_use_native_aio= FALSE; #endif #ifdef _WIN32 - switch (srv_file_flush_method) { - case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */: - srv_file_flush_method = SRV_ALL_O_DIRECT_FSYNC; - break; - case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */: - srv_file_flush_method = SRV_FSYNC; - break; - default: - ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC); - } + switch (srv_file_flush_method) { + case SRV_ALL_O_DIRECT_FSYNC + 1 /* "async_unbuffered"="unbuffered" */: + srv_file_flush_method= SRV_ALL_O_DIRECT_FSYNC; + break; + case SRV_ALL_O_DIRECT_FSYNC + 2 /* "normal"="fsync" */: + srv_file_flush_method= SRV_FSYNC; + break; + default: + ut_ad(srv_file_flush_method <= SRV_ALL_O_DIRECT_FSYNC); + } #else - ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); + ut_ad(srv_file_flush_method <= SRV_O_DIRECT_NO_FSYNC); #endif - innodb_buffer_pool_size_init(); - - srv_lock_table_size = 5 * (srv_buf_pool_size >> srv_page_size_shift); - DBUG_RETURN(0); + DBUG_RETURN(0); } /** Initialize the InnoDB storage engine plugin. @@ -17466,27 +17396,6 @@ innodb_stopword_table_validate( return(ret); } -extern void buf_resize_start(); - -/** Update the system variable innodb_buffer_pool_size using the "saved" -value. This function is registered as a callback with MySQL. -@param[in] save immediate result from check function */ -static -void -innodb_buffer_pool_size_update(THD*,st_mysql_sys_var*,void*, const void* save) -{ - longlong in_val = *static_cast(save); - - snprintf(export_vars.innodb_buffer_pool_resize_status, - sizeof(export_vars.innodb_buffer_pool_resize_status), - "Requested to resize buffer pool."); - - buf_resize_start(); - - ib::info() << export_vars.innodb_buffer_pool_resize_status - << " (new size: " << in_val << " bytes)"; -} - /** The latest assigned innodb_ft_aux_table name */ static char* innodb_ft_aux_table; @@ -19102,11 +19011,12 @@ static MYSQL_SYSVAR_UINT(autoextend_increment, "Data file autoextend increment in megabytes", NULL, NULL, 64, 1, 1000, 0); -static MYSQL_SYSVAR_ULONG(buffer_pool_chunk_size, srv_buf_pool_chunk_unit, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Size of a single memory chunk" - " for resizing buffer pool. Online buffer pool resizing happens" - " at this granularity. 0 means disable resizing buffer pool.", +static size_t innodb_buffer_pool_chunk_size; + +static MYSQL_SYSVAR_SIZE_T(buffer_pool_chunk_size, + innodb_buffer_pool_chunk_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_DEPRECATED, + "Deprecated parameter with no effect", NULL, NULL, 128 * 1024 * 1024, 1024 * 1024, LONG_MAX, 1024 * 1024); @@ -19755,6 +19665,7 @@ static MYSQL_SYSVAR_BOOL(encrypt_temporary_tables, innodb_encrypt_temporary_tabl static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(autoextend_increment), MYSQL_SYSVAR(buffer_pool_size), + MYSQL_SYSVAR(buffer_pool_size_max), MYSQL_SYSVAR(buffer_pool_chunk_size), MYSQL_SYSVAR(buffer_pool_filename), MYSQL_SYSVAR(buffer_pool_dump_now), @@ -20841,90 +20752,6 @@ innobase_convert_to_system_charset( cs2, to, static_cast(len), errors))); } -/** Validate the requested buffer pool size. Also, reserve the necessary -memory needed for buffer pool resize. -@param[in] thd thread handle -@param[out] save immediate result for update function -@param[in] value incoming string -@return 0 on success, 1 on failure. -*/ -static -int -innodb_buffer_pool_size_validate( - THD* thd, - st_mysql_sys_var*, - void* save, - struct st_mysql_value* value) -{ - longlong intbuf; - - value->val_int(value, &intbuf); - - if (static_cast(intbuf) < MYSQL_SYSVAR_NAME(buffer_pool_size).min_val) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_buffer_pool_size must be at least" - " %lld for innodb_page_size=%lu", - MYSQL_SYSVAR_NAME(buffer_pool_size).min_val, - srv_page_size); - return(1); - } - - if (!srv_was_started) { - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "Cannot update innodb_buffer_pool_size," - " because InnoDB is not started."); - return(1); - } - - mysql_mutex_lock(&buf_pool.mutex); - - if (srv_buf_pool_old_size != srv_buf_pool_size) { - mysql_mutex_unlock(&buf_pool.mutex); - my_printf_error(ER_WRONG_ARGUMENTS, - "Another buffer pool resize is already in progress.", MYF(0)); - return(1); - } - - ulint requested_buf_pool_size = buf_pool_size_align(ulint(intbuf)); - - *static_cast(save) = requested_buf_pool_size; - - if (srv_buf_pool_size == ulint(intbuf)) { - mysql_mutex_unlock(&buf_pool.mutex); - /* nothing to do */ - return(0); - } - - if (srv_buf_pool_size == requested_buf_pool_size) { - mysql_mutex_unlock(&buf_pool.mutex); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_WRONG_ARGUMENTS, - "innodb_buffer_pool_size must be at least" - " innodb_buffer_pool_chunk_size=%lu", - srv_buf_pool_chunk_unit); - /* nothing to do */ - return(0); - } - - srv_buf_pool_size = requested_buf_pool_size; - mysql_mutex_unlock(&buf_pool.mutex); - - if (intbuf != static_cast(requested_buf_pool_size)) { - char buf[64]; - int len = 64; - value->val_str(value, buf, &len); - push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_TRUNCATED_WRONG_VALUE, - "Truncated incorrect %-.32s value: '%-.128s'", - mysql_sysvar_buffer_pool_size.name, - value->val_str(value, buf, &len)); - } - - return(0); -} - /*************************************************************//** Check for a valid value of innobase_compression_algorithm. @return 0 for valid innodb_compression_algorithm. */ @@ -21272,22 +21099,6 @@ void ins_node_t::vers_update_end(row_prebuilt_t *prebuilt, bool history_row) mem_heap_free(local_heap); } -/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, -if needed. -@param[in] size size in bytes -@return aligned size */ -ulint buf_pool_size_align(ulint size) noexcept -{ - const ulong m = srv_buf_pool_chunk_unit; - size = ut_max((size_t) size, (size_t) MYSQL_SYSVAR_NAME(buffer_pool_size).min_val); - - if (size % m == 0) { - return(size); - } else { - return (ulint)((size / m + 1) * m); - } -} - /** Adjust the persistent statistics after rebuilding ALTER TABLE. Remove statistics for dropped indexes, add statistics for created indexes and rename statistics for renamed indexes. diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 6385f31fe6298..fc72ed29e6f15 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -3562,7 +3562,7 @@ static int i_s_innodb_stats_fill(THD *thd, TABLE_LIST * tables, Item *) DBUG_RETURN(0); } - buf_stats_get_pool_info(&info); + buf_pool.get_info(&info); table = tables->table; @@ -4122,87 +4122,37 @@ and fetch information to information schema tables: INNODB_BUFFER_PAGE. @return 0 on success, 1 on failure */ static int i_s_innodb_buffer_page_fill(THD *thd, TABLE_LIST *tables, Item *) { - int status = 0; - mem_heap_t* heap; - - DBUG_ENTER("i_s_innodb_buffer_page_fill"); - - RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); - - /* deny access to user without PROCESS privilege */ - if (check_global_access(thd, PROCESS_ACL)) { - DBUG_RETURN(0); - } - - heap = mem_heap_create(10000); - - for (ulint n = 0; - n < ut_min(buf_pool.n_chunks, buf_pool.n_chunks_new); n++) { - const buf_block_t* block; - ulint n_blocks; - buf_page_info_t* info_buffer; - ulint num_page; - ulint mem_size; - ulint chunk_size; - ulint num_to_process = 0; - ulint block_id = 0; - - /* Get buffer block of the nth chunk */ - block = buf_pool.chunks[n].blocks; - chunk_size = buf_pool.chunks[n].size; - num_page = 0; - - while (chunk_size > 0) { - /* we cache maximum MAX_BUF_INFO_CACHED number of - buffer page info */ - num_to_process = ut_min(chunk_size, - (ulint)MAX_BUF_INFO_CACHED); - - mem_size = num_to_process * sizeof(buf_page_info_t); - - /* For each chunk, we'll pre-allocate information - structures to cache the page information read from - the buffer pool. Doing so before obtain any mutex */ - info_buffer = (buf_page_info_t*) mem_heap_zalloc( - heap, mem_size); - - /* Obtain appropriate mutexes. Since this is diagnostic - buffer pool info printout, we are not required to - preserve the overall consistency, so we can - release mutex periodically */ - mysql_mutex_lock(&buf_pool.mutex); - - /* GO through each block in the chunk */ - for (n_blocks = num_to_process; n_blocks--; block++) { - i_s_innodb_buffer_page_get_info( - &block->page, block_id, - info_buffer + num_page); - block_id++; - num_page++; - } - - mysql_mutex_unlock(&buf_pool.mutex); - - /* Fill in information schema table with information - just collected from the buffer chunk scan */ - status = i_s_innodb_buffer_page_fill( - thd, tables, info_buffer, - num_page); - - /* If something goes wrong, break and return */ - if (status) { - break; - } - - mem_heap_empty(heap); - chunk_size -= num_to_process; - num_page = 0; - } - } + DBUG_ENTER("i_s_innodb_buffer_page_fill"); + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name.str); - mem_heap_free(heap); + /* deny access to user without PROCESS privilege */ + if (check_global_access(thd, PROCESS_ACL)) + DBUG_RETURN(0); - DBUG_RETURN(status); + int status; + buf_page_info_t *b= + static_cast(my_malloc(PSI_INSTRUMENT_ME, + MAX_BUF_INFO_CACHED * sizeof *b, + MYF(MY_WME))); + if (!b) + DBUG_RETURN(1); + for (size_t j= 0;;) + { + memset((void*) b, 0, MAX_BUF_INFO_CACHED * sizeof *b); + mysql_mutex_lock(&buf_pool.mutex); + const size_t N= buf_pool.curr_size(); + const size_t n= std::min(N, MAX_BUF_INFO_CACHED); + for (size_t i= 0; i < n && j < N; i++, j++) + i_s_innodb_buffer_page_get_info(&buf_pool.get_nth_page(j)->page, j, + &b[i]); + + mysql_mutex_unlock(&buf_pool.mutex); + status= i_s_innodb_buffer_page_fill(thd, tables, b, n); + if (status || j >= N) + break; + } + my_free(b); + DBUG_RETURN(status); } /*******************************************************************//** diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 666a083972984..df8ea0521e0b9 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -449,8 +449,8 @@ ibuf_init_at_db_start(void) buffer pool size. Once ibuf struct is initialized this value is updated with the user supplied size by calling ibuf_max_size_update(). */ - ibuf.max_size = ((buf_pool_get_curr_size() >> srv_page_size_shift) - * CHANGE_BUFFER_DEFAULT_SIZE) / 100; + ibuf.max_size = buf_pool.curr_size() + * CHANGE_BUFFER_DEFAULT_SIZE / 100; mysql_mutex_init(ibuf_mutex_key, &ibuf_mutex, nullptr); mysql_mutex_init(ibuf_pessimistic_insert_mutex_key, @@ -507,8 +507,8 @@ ibuf_max_size_update( percentage of the buffer pool size */ { if (UNIV_UNLIKELY(!ibuf.index)) return; - ulint new_size = ((buf_pool_get_curr_size() >> srv_page_size_shift) - * new_val) / 100; + ulint new_size = std::min( + buf_pool.curr_size() * new_val / 100, uint32_t(~0U)); mysql_mutex_lock(&ibuf_mutex); ibuf.max_size = new_size; mysql_mutex_unlock(&ibuf_mutex); @@ -2065,8 +2065,7 @@ ibuf_get_merge_page_nos_func( } } - limit = ut_min(IBUF_MAX_N_PAGES_MERGED, - buf_pool_get_curr_size() / 4); + limit = std::min(IBUF_MAX_N_PAGES_MERGED, buf_pool.curr_size() / 4); first_page_no = ibuf_rec_get_page_no(mtr, rec); first_space_id = ibuf_rec_get_space(mtr, rec); diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index b75cad1018019..6d084e8c65133 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -39,12 +39,16 @@ extern mysql_pfs_key_t btr_search_latch_key; #define btr_search_sys_create() btr_search_sys.create() #define btr_search_sys_free() btr_search_sys.free() -/** Disable the adaptive hash search system and empty the index. */ -void btr_search_disable(); +/** Lazily free detached metadata when removing the last reference. */ +ATTRIBUTE_COLD void btr_search_lazy_free(dict_index_t *index); + +/** Disable the adaptive hash search system and empty the index. +@return whether the adaptive hash index was enabled */ +ATTRIBUTE_COLD bool btr_search_disable(); /** Enable the adaptive hash search system. @param resize whether buf_pool_t::resize() is the caller */ -void btr_search_enable(bool resize= false); +ATTRIBUTE_COLD void btr_search_enable(bool resize= false); /*********************************************************************//** Updates the search info. */ diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h index bb9994203d6b0..11c42307b4704 100644 --- a/storage/innobase/include/buf0buddy.h +++ b/storage/innobase/include/buf0buddy.h @@ -24,17 +24,13 @@ Binary buddy allocator for compressed pages Created December 2006 by Marko Makela *******************************************************/ -#ifndef buf0buddy_h -#define buf0buddy_h - +#pragma once #include "buf0types.h" /** @param[in] block size in bytes @return index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -inline -ulint -buf_buddy_get_slot(ulint size) +inline ulint buf_buddy_get_slot(ulint size) noexcept { ulint i; ulint s; @@ -53,13 +49,13 @@ buf_buddy_get_slot(ulint size) @param i index of buf_pool.zip_free[] or BUF_BUDDY_SIZES @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -byte *buf_buddy_alloc_low(ulint i, bool *lru) MY_ATTRIBUTE((malloc)); +byte *buf_buddy_alloc_low(ulint i, bool *lru) noexcept MY_ATTRIBUTE((malloc)); /** Allocate a ROW_FORMAT=COMPRESSED block. @param size compressed page size in bytes @param lru assigned to true if buf_pool.mutex was temporarily released @return allocated block, never NULL */ -inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) +inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) noexcept { return buf_buddy_alloc_low(buf_buddy_get_slot(size), lru); } @@ -68,24 +64,26 @@ inline byte *buf_buddy_alloc(ulint size, bool *lru= nullptr) @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] i index of buf_pool.zip_free[], or BUF_BUDDY_SIZES */ -void buf_buddy_free_low(void* buf, ulint i); +void buf_buddy_free_low(void* buf, ulint i) noexcept; /** Deallocate a block. @param[in] buf block to be freed, must not be pointed to by the buffer pool @param[in] size block size in bytes */ -inline void buf_buddy_free(void* buf, ulint size) +inline void buf_buddy_free(void* buf, ulint size) noexcept { - buf_buddy_free_low(buf, buf_buddy_get_slot(size)); + buf_buddy_free_low(buf, buf_buddy_get_slot(size)); } -/** Try to reallocate a block. -@param[in] buf block to be reallocated, must be pointed -to by the buffer pool -@param[in] size block size, up to srv_page_size -@retval false if failed because of no free blocks. */ -bool buf_buddy_realloc(void* buf, ulint size); - -/** Combine all pairs of free buddies. */ -void buf_buddy_condense_free(); -#endif /* buf0buddy_h */ +ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull, warn_unused_result)) +/** Reallocate a ROW_FORMAT=COMPRESSED page frame during buf_pool_t::shrink(). +@param bpage page descriptor covering a ROW_FORMAT=COMPRESSED page +@param block uncompressed block for storage +@return block +@retval nullptr if the block was consumed */ +ATTRIBUTE_COLD +buf_block_t *buf_buddy_shrink(buf_page_t *bpage, buf_block_t *block) noexcept; + +/** Combine all pairs of free buddies. +@param size the target innodb_buffer_pool_size */ +ATTRIBUTE_COLD void buf_buddy_condense_free(size_t size) noexcept; diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 605ce772b3796..7c24d0b4db39b 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -35,13 +35,16 @@ Created 11/5/1995 Heikki Tuuri #include "assume_aligned.h" #include "buf0types.h" #ifndef UNIV_INNOCHECKSUM -#include "ut0byte.h" #include "page0types.h" #include "log0log.h" #include "srv0srv.h" #include "transactional_lock_guard.h" #include +/** The allocation granularity of innodb_buffer_pool_size */ +constexpr size_t innodb_buffer_pool_extent_size= + sizeof(size_t) < 8 ? 2 << 20 : 8 << 20; + /** @name Modes for buf_page_get_gen */ /* @{ */ #define BUF_GET 10 /*!< get always */ @@ -71,7 +74,7 @@ struct buf_pool_info_t ulint pool_size; /*!< Buffer Pool size in pages */ ulint lru_len; /*!< Length of buf_pool.LRU */ ulint old_lru_len; /*!< buf_pool.LRU_old_len */ - ulint free_list_len; /*!< Length of buf_pool.free list */ + ulint free_list_len; /*!< free + lazy_allocate_size() */ ulint flush_list_len; /*!< Length of buf_pool.flush_list */ ulint n_pend_unzip; /*!< buf_pool.n_pend_unzip, pages pending decompress */ @@ -142,10 +145,8 @@ operator<<( const page_id_t page_id); #ifndef UNIV_INNOCHECKSUM -# define buf_pool_get_curr_size() srv_buf_pool_curr_size # define buf_block_free(block) buf_pool.free_block(block) - -#define buf_page_get(ID, SIZE, LA, MTR) \ +# define buf_page_get(ID, SIZE, LA, MTR) \ buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, MTR) /** Try to buffer-fix a page. @@ -399,9 +400,6 @@ void buf_print_io( /*=========*/ FILE* file); /*!< in: file where to print */ -/** Collect buffer pool metadata. -@param[out] pool_info buffer pool metadata */ -void buf_stats_get_pool_info(buf_pool_info_t *pool_info) noexcept; /** Refresh the statistics used to print per-second averages. */ void buf_refresh_io_stats() noexcept; @@ -431,12 +429,6 @@ counter value in MONITOR_MODULE_BUF_PAGE. ATTRIBUTE_COLD void buf_page_monitor(const buf_page_t &bpage, bool read) noexcept; -/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, -if needed. -@param[in] size size in bytes -@return aligned size */ -ulint buf_pool_size_align(ulint size) noexcept; - /** Verify that post encryption checksum match with the calculated checksum. This function should be called only if tablespace contains crypt data metadata. @param[in] page page frame @@ -555,7 +547,7 @@ class buf_page_t /** buf_pool.LRU status mask in state() */ static constexpr uint32_t LRU_MASK= 7U << 29; - /** lock covering the contents of frame */ + /** lock covering the contents of frame() */ block_lock lock; /** pointer to aligned, uncompressed page frame of innodb_page_size */ byte *frame; @@ -565,8 +557,6 @@ class buf_page_t !frame && !zip.data means an active buf_pool.watch */ page_zip_des_t zip; #ifdef UNIV_DEBUG - /** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */ - bool in_zip_hash; /** whether this->LRU is in buf_pool.LRU (in_file()); protected by buf_pool.mutex */ bool in_LRU_list; @@ -580,7 +570,7 @@ class buf_page_t /** list member in one of the lists of buf_pool; protected by buf_pool.mutex or buf_pool.flush_list_mutex - state() == NOT_USED: buf_pool.free or buf_pool.withdraw + state() == NOT_USED: buf_pool.free in_file() && oldest_modification(): buf_pool.flush_list (protected by buf_pool.flush_list_mutex) @@ -621,7 +611,7 @@ class buf_page_t lock() /* not copied */, frame(b.frame), zip(b.zip), #ifdef UNIV_DEBUG - in_zip_hash(b.in_zip_hash), in_LRU_list(b.in_LRU_list), + in_LRU_list(b.in_LRU_list), in_page_hash(b.in_page_hash), in_free_list(b.in_free_list), #endif /* UNIV_DEBUG */ list(b.list), LRU(b.LRU), old(b.old), freed_page_clock(b.freed_page_clock), @@ -638,7 +628,6 @@ class buf_page_t id_= id; zip.fix= state; oldest_modification_= 0; - ut_d(in_zip_hash= false); ut_d(in_free_list= false); ut_d(in_LRU_list= false); ut_d(in_page_hash= false); @@ -647,6 +636,15 @@ class buf_page_t access_time= 0; } + void set_os_unused() const + { + MEM_NOACCESS(frame, srv_page_size); + } + + void set_os_used() const + { + MEM_MAKE_ADDRESSABLE(frame, srv_page_size); + } public: const page_id_t &id() const noexcept { return id_; } uint32_t state() const noexcept { return zip.fix; } @@ -888,10 +886,6 @@ struct buf_block_t{ buf_pool.page_hash can point to buf_page_t or buf_block_t */ #ifdef UNIV_DEBUG - /** whether page.list is in buf_pool.withdraw - ((state() == NOT_USED)) and the buffer pool is being shrunk; - protected by buf_pool.mutex */ - bool in_withdraw_list; /** whether unzip_LRU is in buf_pool.unzip_LRU (in_file() && frame && zip.data); protected by buf_pool.mutex */ @@ -1019,15 +1013,10 @@ struct buf_block_t{ @param state initial state() */ void initialise(const page_id_t page_id, ulint zip_size, uint32_t state) noexcept; -}; -/**********************************************************************//** -Compute the hash fold value for blocks in buf_pool.zip_hash. */ -/* @{ */ -#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift) -#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->page.frame) -#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) -/* @} */ + /** Calculate the page frame address */ + IF_DBUG(,inline) byte *frame_address() const noexcept; +}; /** A "Hazard Pointer" class used to iterate over buf_pool.LRU or buf_pool.flush_list. A hazard pointer is a buf_page_t pointer @@ -1195,59 +1184,62 @@ struct buf_buddy_stat_t { /** The buffer pool */ class buf_pool_t { - /** A chunk of buffers */ - struct chunk_t - { - /** number of elements in blocks[] */ - size_t size; - /** memory allocated for the page frames */ - unsigned char *mem; - /** descriptor of mem */ - ut_new_pfx_t mem_pfx; - /** array of buffer control blocks */ - buf_block_t *blocks; - - /** Map of first page frame address to chunks[] */ - using map= std::map, - ut_allocator>>; - /** Chunk map that may be under construction by buf_resize_thread() */ - static map *map_reg; - /** Current chunk map for lookup only */ - static map *map_ref; - - /** @return the memory size bytes. */ - size_t mem_size() const noexcept { return mem_pfx.m_size; } - - /** Register the chunk */ - void reg() noexcept - { map_reg->emplace(map::value_type(blocks->page.frame, this)); } - - /** Allocate a chunk of buffer frames. - @param bytes requested size - @return whether the allocation succeeded */ - inline bool create(size_t bytes) noexcept; - -#ifdef UNIV_DEBUG - /** Find a block that points to a ROW_FORMAT=COMPRESSED page - @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame - @return the block - @retval nullptr if not found */ - const buf_block_t *contains_zip(const void *data) const noexcept - { - const buf_block_t *block= blocks; - for (auto i= size; i--; block++) - if (block->page.zip.data == data) - return block; - return nullptr; - } + /** arrays of buf_block_t followed by page frames; + aliged to and repeating every innodb_buffer_pool_extent_size; + each extent comprises pages_in_extent[] blocks */ + alignas(CPU_LEVEL1_DCACHE_LINESIZE) char *memory; + /** the allocation of the above memory, possibly including some + alignment loss at the beginning */ + char *memory_unaligned; + /** the virtual address range size of memory_unaligned */ + size_t size_unaligned; +#ifdef UNIV_PFS_MEMORY + /** the "owner thread" of the buffer pool allocation */ + PSI_thread *owner; +#endif + /** initialized number of block descriptors */ + size_t n_blocks; + /** number of blocks that need to be freed in shrink() */ + size_t n_blocks_to_withdraw; + /** first block to withdraw in shrink() */ + const buf_page_t *first_to_withdraw; + + /** amount of memory allocated to the buffer pool and descriptors; + protected by mutex */ + Atomic_relaxed size_in_bytes; - /** Check that all blocks are in a replaceable state. - @return address of a non-free block - @retval nullptr if all freed */ - inline const buf_block_t *not_freed() const noexcept; -#endif /* UNIV_DEBUG */ - }; public: + /** The requested innodb_buffer_pool_size */ + size_t size_in_bytes_requested; + /** The maximum allowed innodb_buffer_pool_size */ + size_t size_in_bytes_max; + + /** @return the current size of the buffer pool, in bytes */ + size_t curr_pool_size() const noexcept { return size_in_bytes; } + + /** @return the current size of the buffer pool, in pages */ + TPOOL_SUPPRESS_TSAN size_t curr_size() const noexcept { return n_blocks; } + /** @return the maximum usable size of the buffer pool, in pages */ + TPOOL_SUPPRESS_TSAN size_t usable_size() const noexcept + { return n_blocks - n_blocks_to_withdraw - UT_LIST_GET_LEN(withdrawn); } + + /** Determine the used size of the buffer pool in bytes. + @param n_blocks size of the buffer pool in blocks + @return the size needed for n_blocks in bytes, for innodb_page_size */ + static size_t blocks_in_bytes(size_t n_blocks) noexcept; + +#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP) + /** Enable buffers to be dumped to core files. + + A convenience function, not called anyhwere directly however + it is left available for gdb or any debugger to call + in the event that you want all of the memory to be dumped + to a core file. + + @return number of errors found in madvise() calls */ + static int madvise_do_dump() noexcept; +#endif + /** Hash cell chain in page_hash_table */ struct hash_chain { @@ -1255,101 +1247,55 @@ class buf_pool_t buf_page_t *first; }; private: - /** Withdraw blocks from the buffer pool until meeting withdraw_target. - @return whether retry is needed */ - inline bool withdraw_blocks() noexcept; - - /** Determine if a pointer belongs to a buf_block_t. It can be a pointer to - the buf_block_t itself or a member of it. - @param ptr a pointer that will not be dereferenced - @return whether the ptr belongs to a buf_block_t struct */ - bool is_block_field(const void *ptr) const noexcept - { - const chunk_t *chunk= chunks; - const chunk_t *const echunk= chunk + ut_min(n_chunks, n_chunks_new); - - /* TODO: protect chunks with a mutex (the older pointer will - currently remain during resize()) */ - for (; chunk < echunk; chunk++) - if (ptr >= reinterpret_cast(chunk->blocks) && - ptr < reinterpret_cast(chunk->blocks + chunk->size)) - return true; - return false; - } + /** Determine the number of blocks in a buffer pool of a particular size. + @param size_in_bytes innodb_buffer_pool_size in bytes + @return number of buffer pool pages */ + static size_t get_n_blocks(size_t size_in_bytes) noexcept; + + /** The outcome of shrink() */ + enum shrink_status{SHRINK_DONE= -1, SHRINK_IN_PROGRESS= 0, SHRINK_ABORT}; - /** Try to reallocate a control block. - @param block control block to reallocate - @return whether the reallocation succeeded */ - inline bool realloc(buf_block_t *block) noexcept; + /** Attempt to shrink the buffer pool. + @param size requested innodb_buffer_pool_size in bytes + @retval whether the shrinking was completed */ + ATTRIBUTE_COLD shrink_status shrink(size_t size) noexcept; + + /** Finish shrinking the buffer pool. + @param size the new innodb_buffer_pool_size in bytes + @param reduced how much the innodb_buffer_pool_size was reduced */ + inline void shrunk(size_t size, size_t reduced) noexcept; public: - bool is_initialised() const noexcept { return chunks != nullptr; } + bool is_initialised() const noexcept { return memory != nullptr; } /** Create the buffer pool. @return whether the creation failed */ - bool create(); + bool create() noexcept; /** Clean up after successful create() */ void close() noexcept; - /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */ - inline void resize(); - - /** @return whether resize() is in progress */ - bool resize_in_progress() const noexcept - { - return UNIV_UNLIKELY(resizing.load(std::memory_order_relaxed)); - } - - /** @return the current size in blocks */ - size_t get_n_pages() const noexcept - { - ut_ad(is_initialised()); - size_t size= 0; - for (auto j= ut_min(n_chunks_new, n_chunks); j--; ) - size+= chunks[j].size; - return size; - } + /** Resize the buffer pool. + @param size requested innodb_buffer_pool_size in bytes + @param thd current connnection */ + ATTRIBUTE_COLD void resize(size_t size, THD *thd) noexcept; - /** Determine whether a frame is intended to be withdrawn during resize(). + /** Determine whether a frame needs to be withdrawn during resize(). @param ptr pointer within a buf_page_t::frame + @param size size_in_bytes_requested @return whether the frame will be withdrawn */ - bool will_be_withdrawn(const byte *ptr) const noexcept + bool will_be_withdrawn(const byte *ptr, size_t size) const noexcept { - ut_ad(n_chunks_new < n_chunks); -#ifdef SAFE_MUTEX - if (resize_in_progress()) - mysql_mutex_assert_owner(&mutex); -#endif /* SAFE_MUTEX */ - - for (const chunk_t *chunk= chunks + n_chunks_new, - * const echunk= chunks + n_chunks; - chunk != echunk; chunk++) - if (ptr >= chunk->blocks->page.frame && - ptr < (chunk->blocks + chunk->size - 1)->page.frame + srv_page_size) - return true; - return false; + const char *p= reinterpret_cast(ptr); + ut_ad(!p || p >= memory); + ut_ad(p < memory + size_in_bytes_max); + return p >= memory + size; } - /** Determine whether a block is intended to be withdrawn during resize(). + /** Withdraw a block if needed in case resize() is shrinking. @param bpage buffer pool block - @return whether the frame will be withdrawn */ - bool will_be_withdrawn(const buf_page_t &bpage) const noexcept - { - ut_ad(n_chunks_new < n_chunks); -#ifdef SAFE_MUTEX - if (resize_in_progress()) - mysql_mutex_assert_owner(&mutex); -#endif /* SAFE_MUTEX */ - - for (const chunk_t *chunk= chunks + n_chunks_new, - * const echunk= chunks + n_chunks; - chunk != echunk; chunk++) - if (&bpage >= &chunk->blocks->page && - &bpage < &chunk->blocks[chunk->size].page) - return true; - return false; - } + @return whether the block was withdrawn */ + ATTRIBUTE_COLD bool withdraw(buf_page_t &bpage) noexcept; /** Release and evict a corrupted page. @param bpage x-latched page that was found corrupted @@ -1363,31 +1309,18 @@ class buf_pool_t #ifdef UNIV_DEBUG /** Find a block that points to a ROW_FORMAT=COMPRESSED page @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame + @param shift number of least significant address bits to ignore @return the block @retval nullptr if not found */ - const buf_block_t *contains_zip(const void *data) const noexcept - { - mysql_mutex_assert_owner(&mutex); - for (const chunk_t *chunk= chunks, * const end= chunks + n_chunks; - chunk != end; chunk++) - if (const buf_block_t *block= chunk->contains_zip(data)) - return block; - return nullptr; - } - + const buf_block_t *contains_zip(const void *data, size_t shift= 0) + const noexcept; /** Assert that all buffer pool pages are in a replaceable state */ void assert_all_freed() noexcept; #endif /* UNIV_DEBUG */ #ifdef BTR_CUR_HASH_ADAPT /** Clear the adaptive hash index on all pages in the buffer pool. */ - inline void clear_hash_index() noexcept; - - /** Get a buffer block from an adaptive hash index pointer. - This function does not return if the block is not identified. - @param ptr pointer to within a page frame - @return pointer to block, never NULL */ - inline buf_block_t *block_from_ahi(const byte *ptr) const noexcept; + void clear_hash_index() noexcept; #endif /* BTR_CUR_HASH_ADAPT */ /** @@ -1410,13 +1343,27 @@ class buf_pool_t return empty_lsn; } - /** Determine if a buffer block was created by chunk_t::create(). - @param block block descriptor (not dereferenced) - @return whether block has been created by chunk_t::create() */ - bool is_uncompressed(const buf_block_t *block) const noexcept + /** Look up the block descriptor for a page frame address. + @param ptr address within a valid page frame + @return the corresponding block descriptor */ + static buf_block_t *block_from(const void *ptr) noexcept; + + /** Access a block while holding the buffer pool mutex. + @param pos position between 0 and get_n_pages() + @return the block descriptor */ + buf_block_t *get_nth_page(size_t pos) const noexcept; + +#ifdef UNIV_DEBUG + /** Determine if an object is within the curr_pool_size() + and associated with an uncompressed page. + @param ptr memory object (not dereferenced) + @return whether the object is valid in the current buffer pool */ + bool is_uncompressed_current(const void *ptr) const noexcept { - return is_block_field(reinterpret_cast(block)); + const ptrdiff_t d= static_cast(ptr) - memory; + return d >= 0 && size_t(d) < curr_pool_size(); } +#endif public: /** page_fix() mode of operation */ @@ -1448,6 +1395,16 @@ class buf_pool_t buf_block_t *page_fix(const page_id_t id) noexcept { return page_fix(id, nullptr, FIX_WAIT_READ); } + /** Validate a block descriptor. + @param b block descriptor that may be invalid after shrink() + @param latch page_hash latch for id + @param id page identifier + @return b->page.fix() if b->page.id() == id + @retval 0 if b is invalid */ + TRANSACTIONAL_TARGET + uint32_t page_guess(buf_block_t *b, page_hash_latch &latch, + const page_id_t id) noexcept; + /** Decompress a page and relocate the block descriptor @param b buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page @param chain hash table chain for b->id().fold() @@ -1469,7 +1426,6 @@ class buf_pool_t buf_page_t *bpage= page_hash.get(page_id, chain); if (bpage >= &watch[0] && bpage < &watch[UT_ARR_SIZE(watch)]) { - ut_ad(!bpage->in_zip_hash); ut_ad(!bpage->zip.data); if (!allow_watch) bpage= nullptr; @@ -1490,7 +1446,6 @@ class buf_pool_t ut_ad(bpage.in_file()); if (&bpage < &watch[0] || &bpage >= &watch[array_elements(watch)]) return false; - ut_ad(!bpage.in_zip_hash); ut_ad(!bpage.zip.data); return true; } @@ -1531,23 +1486,30 @@ class buf_pool_t inline uint32_t watch_remove(buf_page_t *w, hash_chain &chain) noexcept; /** @return whether less than 1/4 of the buffer pool is available */ - TPOOL_SUPPRESS_TSAN - bool running_out() const noexcept - { - return !recv_recovery_is_on() && - UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < - (n_chunks_new * chunks->size) / 4; - } + bool running_out() const noexcept; /** @return whether the buffer pool is running low */ bool need_LRU_eviction() const noexcept; - /** @return whether the buffer pool is shrinking */ - inline bool is_shrinking() const noexcept + /** @return number of blocks resize() needs to evict from the buffer pool */ + size_t is_shrinking() const noexcept { - return n_chunks_new < n_chunks; + mysql_mutex_assert_owner(&mutex); + return n_blocks_to_withdraw + UT_LIST_GET_LEN(withdrawn); } + /** @return number of blocks in resize() waiting to be withdrawn */ + size_t to_withdraw() const noexcept + { + mysql_mutex_assert_owner(&mutex); + return n_blocks_to_withdraw; + } + + /** @return the shrinking size of the buffer pool, in bytes + @retval 0 if resize() is not shrinking the buffer pool */ + size_t shrinking_size() const noexcept + { return is_shrinking() ? size_in_bytes_requested : 0; } + #ifdef UNIV_DEBUG /** Validate the buffer pool. */ void validate() noexcept; @@ -1564,7 +1526,6 @@ class buf_pool_t mysql_mutex_assert_owner(&mutex); ut_ad(bpage->in_LRU_list); ut_ad(bpage->in_page_hash); - ut_ad(!bpage->in_zip_hash); ut_ad(bpage->in_file()); lru_hp.adjust(bpage); lru_scan_itr.adjust(bpage); @@ -1584,26 +1545,8 @@ class buf_pool_t /** @name General fields */ /* @{ */ - ulint curr_pool_size; /*!< Current pool size in bytes */ ulint LRU_old_ratio; /*!< Reserve this much of the buffer pool for "old" blocks */ -#ifdef UNIV_DEBUG - ulint buddy_n_frames; /*!< Number of frames allocated from - the buffer pool to the buddy system */ - ulint mutex_exit_forbidden; /*!< Forbid release mutex */ -#endif - ut_allocator allocator; /*!< Allocator used for - allocating memory for the the "chunks" - member. */ - ulint n_chunks; /*!< number of buffer pool chunks */ - ulint n_chunks_new; /*!< new number of buffer pool chunks. - both n_chunks{,new} are protected under - mutex */ - chunk_t* chunks; /*!< buffer pool chunks */ - chunk_t* chunks_old; /*!< old buffer pool chunks to be freed - after resizing buffer pool */ - /** current pool size in pages */ - Atomic_counter curr_size; /** read-ahead request size in pages */ Atomic_counter read_ahead_area; @@ -1715,12 +1658,6 @@ class buf_pool_t /** Look up a page in a hash bucket chain. */ inline buf_page_t *get(const page_id_t id, const hash_chain &chain) const noexcept; - - /** Exclusively aqcuire all latches */ - inline void write_lock_all() noexcept; - - /** Release all latches */ - inline void write_unlock_all() noexcept; }; /** Buffer pool mutex */ @@ -1737,9 +1674,6 @@ class buf_pool_t indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */ page_hash_table page_hash; - /** map of block->frame to buf_block_t blocks that belong - to buf_buddy_alloc(); protected by buf_pool.mutex */ - hash_table_t zip_hash; /** number of pending unzip() */ Atomic_counter n_pend_unzip; @@ -1864,30 +1798,29 @@ class buf_pool_t Set whenever the free list grows, along with a broadcast of done_free. Protected by buf_pool.mutex. */ Atomic_relaxed try_LRU_scan; - /** Whether we have warned to be running out of buffer pool */ - std::atomic_flag LRU_warned; /* @} */ /** @name LRU replacement algorithm fields */ /* @{ */ - UT_LIST_BASE_NODE_T(buf_page_t) free; - /*!< base node of the free - block list */ +private: + /** Whether we have warned to be running out of buffer pool; + only modified by buf_flush_page_cleaner(): + set while holding mutex, cleared while holding flush_list_mutex */ + Atomic_relaxed LRU_warned; + + /** withdrawn blocks during resize() */ + UT_LIST_BASE_NODE_T(buf_page_t) withdrawn; + +public: + /** list of blocks available for allocate() */ + UT_LIST_BASE_NODE_T(buf_page_t) free; + /** broadcast each time when the free list grows or try_LRU_scan is set; protected by mutex */ pthread_cond_t done_free; - UT_LIST_BASE_NODE_T(buf_page_t) withdraw; - /*!< base node of the withdraw - block list. It is only used during - shrinking buffer pool size, not to - reuse the blocks will be removed */ - - ulint withdraw_target;/*!< target length of withdraw - block list, when withdrawing */ - /** "hazard pointer" used during scan of LRU while doing LRU list batch. Protected by buf_pool_t::mutex. */ LRUHp lru_hp; @@ -1928,6 +1861,14 @@ class buf_pool_t /** Sentinels to detect if pages are read into the buffer pool while a delete-buffering operation is pending. Protected by mutex. */ buf_page_t watch[innodb_purge_threads_MAX + 1]; + + /** Clear LRU_warned */ + void LRU_warned_clear() noexcept + { + mysql_mutex_assert_owner(&flush_list_mutex); + LRU_warned= false; + } + /** Reserve a buffer. */ buf_tmp_buffer_t *io_buf_reserve(bool wait_for_reads) noexcept { return io_buf.reserve(wait_for_reads); } @@ -1936,6 +1877,10 @@ class buf_pool_t /** Remove a block from the flush list. */ inline void delete_from_flush_list_low(buf_page_t *bpage) noexcept; public: + /** Try to allocate a block. + @return a buffer block + @retval nullptr if no blocks are available */ + buf_block_t *allocate() noexcept; /** Remove a block from flush_list. @param bpage buffer pool page */ void delete_from_flush_list(buf_page_t *bpage) noexcept; @@ -1954,6 +1899,10 @@ class buf_pool_t /** Print buffer pool flush state information. */ ATTRIBUTE_COLD void print_flush_info() const noexcept; + /** Collect buffer pool metadata. + @param pool_info buffer pool metadata */ + void get_info(buf_pool_info_t *pool_info) noexcept; + private: /** Temporary memory for page_compressed and encrypted I/O */ struct io_buf_t @@ -1970,9 +1919,6 @@ class buf_pool_t /** Reserve a buffer */ buf_tmp_buffer_t *reserve(bool wait_for_reads) noexcept; } io_buf; - - /** whether resize() is in the critical path */ - std::atomic resizing; }; /** The InnoDB buffer pool */ @@ -2121,24 +2067,6 @@ inline void buf_page_t::set_old(bool old) noexcept this->old= old; } -#ifdef UNIV_DEBUG -/** Forbid the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_forbid() do { \ - mysql_mutex_assert_owner(&buf_pool.mutex); \ - buf_pool.mutex_exit_forbidden++; \ -} while (0) -/** Allow the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_allow() do { \ - mysql_mutex_assert_owner(&buf_pool.mutex); \ - ut_ad(buf_pool.mutex_exit_forbidden--); \ -} while (0) -#else -/** Forbid the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_forbid() ((void) 0) -/** Allow the release of the buffer pool mutex. */ -# define buf_pool_mutex_exit_allow() ((void) 0) -#endif - /********************************************************************** Let us list the consistency conditions for different control block states. diff --git a/storage/innobase/include/buf0buf.inl b/storage/innobase/include/buf0buf.inl index 85bd10b5e88a6..bd66689c4b165 100644 --- a/storage/innobase/include/buf0buf.inl +++ b/storage/innobase/include/buf0buf.inl @@ -37,7 +37,7 @@ inline bool buf_page_peek_if_young(const buf_page_t *bpage) /* FIXME: bpage->freed_page_clock is 31 bits */ return((buf_pool.freed_page_clock & ((1UL << 31) - 1)) < (bpage->freed_page_clock - + (buf_pool.curr_size + + (buf_pool.curr_size() * (BUF_LRU_OLD_RATIO_DIV - buf_pool.LRU_old_ratio) / (BUF_LRU_OLD_RATIO_DIV * 4)))); } diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index ac4f36066a490..29f41adde1cfa 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -55,10 +55,6 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip) @return true if found and freed */ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED); -/** @return a buffer block from the buf_pool.free list -@retval NULL if the free list is empty */ -buf_block_t* buf_LRU_get_free_only(); - /** Get a block from the buf_pool.free list. If the list is empty, blocks will be moved from the end of buf_pool.LRU to buf_pool.free. diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 7195b062e702f..94e5ff26de7f4 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -282,17 +282,6 @@ extern ulong srv_log_write_ahead_size; extern my_bool srv_adaptive_flushing; extern my_bool srv_flush_sync; -/** Requested size in bytes */ -extern ulint srv_buf_pool_size; -/** Requested buffer pool chunk size. Each buffer pool instance consists -of one or more chunks. */ -extern ulong srv_buf_pool_chunk_unit; -/** Previously requested size */ -extern ulint srv_buf_pool_old_size; -/** Current size as scaling factor for the other components */ -extern ulint srv_buf_pool_base_size; -/** Current size in bytes */ -extern ulint srv_buf_pool_curr_size; /** Dump this % of each buffer pool during BP dump */ extern ulong srv_buf_pool_dump_pct; #ifdef UNIV_DEBUG @@ -651,7 +640,7 @@ struct export_var_t{ #endif /* BTR_CUR_HASH_ADAPT */ char innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */ char innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */ - char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */ + char innodb_buffer_pool_resize_status[65];/*!< Buf pool resize status */ my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */ diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index fd0cd6d4ba5c2..5f1fca9e657dd 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -958,8 +958,6 @@ void log_free_check() log_check_margins(); } -extern void buf_resize_shutdown(); - /** Make a checkpoint at the latest lsn on shutdown. */ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() { @@ -976,8 +974,6 @@ ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown() srv_master_timer.reset(); } - /* Wait for the end of the buffer resize task.*/ - buf_resize_shutdown(); dict_stats_shutdown(); btr_defragment_shutdown(); diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index effb3ced6735d..aa0c90029755a 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1539,34 +1539,18 @@ inline void recv_sys_t::free(const void *data) ut_ad(!ut_align_offset(data, ALIGNMENT)); mysql_mutex_assert_owner(&mutex); - /* MDEV-14481 FIXME: To prevent race condition with buf_pool.resize(), - we must acquire and hold the buffer pool mutex here. */ - ut_ad(!buf_pool.resize_in_progress()); - - auto *chunk= buf_pool.chunks; - for (auto i= buf_pool.n_chunks; i--; chunk++) + buf_block_t *block= buf_pool.block_from(data); + ut_ad(block->page.frame == page_align(data)); + ut_ad(block->page.state() == buf_page_t::MEMORY); + ut_ad(uint16_t(block->page.free_offset - 1) < srv_page_size); + ut_ad(block->page.used_records); + if (!--block->page.used_records) { - if (data < chunk->blocks->page.frame) - continue; - const size_t offs= (reinterpret_cast(data) - - chunk->blocks->page.frame) >> srv_page_size_shift; - if (offs >= chunk->size) - continue; - buf_block_t *block= &chunk->blocks[offs]; - ut_ad(block->page.frame == page_align(data)); - ut_ad(block->page.state() == buf_page_t::MEMORY); - ut_ad(uint16_t(block->page.free_offset - 1) < srv_page_size); - ut_ad(block->page.used_records); - if (!--block->page.used_records) - { - block->page.hash= nullptr; - UT_LIST_REMOVE(blocks, block); - MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); - buf_block_free(block); - } - return; + block->page.hash= nullptr; + UT_LIST_REMOVE(blocks, block); + MEM_MAKE_ADDRESSABLE(block->page.frame, srv_page_size); + buf_block_free(block); } - ut_ad(0); } @@ -2230,12 +2214,13 @@ ATTRIBUTE_COLD void recv_sys_t::wait_for_pool(size_t pages) { mysql_mutex_unlock(&mutex); os_aio_wait_until_no_pending_reads(false); + os_aio_wait_until_no_pending_writes(false); mysql_mutex_lock(&mutex); garbage_collect(); mysql_mutex_lock(&buf_pool.mutex); - bool need_more= UT_LIST_GET_LEN(buf_pool.free) < pages; + const size_t available= UT_LIST_GET_LEN(buf_pool.free); mysql_mutex_unlock(&buf_pool.mutex); - if (need_more) + if (available < pages) buf_flush_sync_batch(recovered_lsn); } diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index 145e2b04051fa..66d207dc13064 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -1459,12 +1459,13 @@ srv_mon_process_existing_counter( /* innodb_buffer_pool_pages_total */ case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL: - value = buf_pool.get_n_pages(); + case MONITOR_OVLD_BUFFER_POOL_SIZE: + value = buf_pool.curr_size(); break; /* innodb_buffer_pool_pages_misc */ case MONITOR_OVLD_BUF_POOL_PAGE_MISC: - value = buf_pool.get_n_pages() + value = buf_pool.curr_size() - UT_LIST_GET_LEN(buf_pool.LRU) - UT_LIST_GET_LEN(buf_pool.free); break; @@ -1614,10 +1615,6 @@ srv_mon_process_existing_counter( value = srv_page_size; break; - case MONITOR_OVLD_BUFFER_POOL_SIZE: - value = srv_buf_pool_size; - break; - /* innodb_rows_read */ case MONITOR_OLVD_ROW_READ: value = srv_stats.n_rows_read; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index def31335d3468..3fb148d078f2a 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -183,17 +183,6 @@ srv_printf_innodb_monitor() will request mutex acquisition with mysql_mutex_lock(), which will wait until it gets the mutex. */ #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) -/** copy of innodb_buffer_pool_size */ -ulint srv_buf_pool_size; -/** Requested buffer pool chunk size. Each buffer pool instance consists -of one or more chunks. */ -ulong srv_buf_pool_chunk_unit; -/** Previously requested size */ -ulint srv_buf_pool_old_size; -/** Current size as scaling factor for the other components */ -ulint srv_buf_pool_base_size; -/** Current size in bytes */ -ulint srv_buf_pool_curr_size; /** Dump this % of each buffer pool during BP dump */ ulong srv_buf_pool_dump_pct; /** Abort load after this amount of pages */ @@ -996,6 +985,7 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_write_requests = srv_stats.buf_pool_write_requests; + mysql_mutex_lock(&buf_pool.mutex); export_vars.innodb_buffer_pool_bytes_data = buf_pool.stat.LRU_bytes + (UT_LIST_GET_LEN(buf_pool.unzip_LRU) @@ -1005,12 +995,21 @@ srv_export_innodb_status(void) export_vars.innodb_buffer_pool_pages_latched = buf_get_latched_pages_number(); #endif /* UNIV_DEBUG */ - export_vars.innodb_buffer_pool_pages_total = buf_pool.get_n_pages(); + export_vars.innodb_buffer_pool_pages_total = buf_pool.curr_size(); export_vars.innodb_buffer_pool_pages_misc = - buf_pool.get_n_pages() + export_vars.innodb_buffer_pool_pages_total - UT_LIST_GET_LEN(buf_pool.LRU) - UT_LIST_GET_LEN(buf_pool.free); + if (size_t shrinking = buf_pool.is_shrinking()) { + snprintf(export_vars.innodb_buffer_pool_resize_status, + sizeof export_vars.innodb_buffer_pool_resize_status, + "Withdrawing blocks. (%zu/%zu).", + buf_pool.to_withdraw(), shrinking); + } else { + export_vars.innodb_buffer_pool_resize_status[0] = '\0'; + } + mysql_mutex_unlock(&buf_pool.mutex); export_vars.innodb_max_trx_id = trx_sys.get_max_trx_id(); export_vars.innodb_history_list_length = trx_sys.history_size_approx(); diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 322f3bfca519a..1a457f85f8953 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1237,31 +1237,10 @@ dberr_t srv_start(bool create_new_db) fil_system.create(srv_file_per_table ? 50000 : 5000); - ib::info() << "Initializing buffer pool, total size = " - << srv_buf_pool_size - << ", chunk size = " << srv_buf_pool_chunk_unit; - if (buf_pool.create()) { - ib::error() << "Cannot allocate memory for the buffer pool"; - return(srv_init_abort(DB_ERROR)); } - ib::info() << "Completed initialization of buffer pool"; - -#ifdef UNIV_DEBUG - /* We have observed deadlocks with a 5MB buffer pool but - the actual lower limit could very well be a little higher. */ - - if (srv_buf_pool_size <= 5 * 1024 * 1024) { - - ib::info() << "Small buffer pool size (" - << srv_buf_pool_size / 1024 / 1024 - << "M), the flst_validate() debug function can cause a" - << " deadlock if the buffer pool fills up."; - } -#endif /* UNIV_DEBUG */ - if (!log_sys.create()) { sql_print_error("InnoDB: Cannot allocate memory;" " too large innodb_log_buffer_size?"); @@ -1269,7 +1248,7 @@ dberr_t srv_start(bool create_new_db) } recv_sys.create(); - lock_sys.create(srv_lock_table_size); + lock_sys.create(srv_lock_table_size = 5 * buf_pool.curr_size()); srv_startup_is_before_trx_rollback_phase = true; diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 8ac3f93cd1557..a3a6c1f81ce8a 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -1236,9 +1236,6 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, static_cast(thd_mdl_context(thd)); ut_ad(mdl_context); - const size_t max_pages= - std::min(buf_pool.curr_size * 3 / 4, size_t{srv_purge_batch_size}); - while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) { /* Track the max {trx_id, undo_no} for truncating the @@ -1288,7 +1285,9 @@ static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, ut_ad(!table_node->in_progress); } - if (purge_sys.n_pages_handled() >= max_pages) + const size_t size{purge_sys.n_pages_handled()}; + if (size >= size_t{srv_purge_batch_size} || + size >= buf_pool.usable_size() * 3 / 4) break; } diff --git a/storage/innobase/ut/ut0rnd.cc b/storage/innobase/ut/ut0rnd.cc index a2e569514cbf0..0e0e0004bb1e6 100644 --- a/storage/innobase/ut/ut0rnd.cc +++ b/storage/innobase/ut/ut0rnd.cc @@ -48,6 +48,8 @@ ut_find_prime( ulint pow2; ulint i; + ut_ad(n); + n += 100; pow2 = 1;